xref: /aosp_15_r20/external/toolchain-utils/cros_utils/buildbot_utils.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1# Copyright 2017 The ChromiumOS Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Utilities for launching and accessing ChromeOS buildbots."""
6
7
8import ast
9import json
10import os
11import re
12import time
13
14from cros_utils import command_executer
15from cros_utils import logger
16
17
18INITIAL_SLEEP_TIME = 7200  # 2 hours; wait time before polling buildbot.
19SLEEP_TIME = 600  # 10 minutes; time between polling of buildbot.
20
21# Some of our slower builders (llvm-next) are taking more
22# than 12 hours. So, increase this TIME_OUT to 15 hours.
23TIME_OUT = 15 * 60 * 60  # Decide the build is dead or will never finish
24
25
26class BuildbotTimeout(Exception):
27    """Exception to throw when a buildbot operation timesout."""
28
29
30def RunCommandInPath(path, cmd):
31    ce = command_executer.GetCommandExecuter()
32    cwd = os.getcwd()
33    os.chdir(path)
34    status, stdout, stderr = ce.RunCommandWOutput(cmd, print_to_console=False)
35    os.chdir(cwd)
36    return status, stdout, stderr
37
38
39def PeekTrybotImage(chromeos_root, buildbucket_id):
40    """Get the artifact URL of a given tryjob.
41
42    Args:
43        buildbucket_id: buildbucket-id
44        chromeos_root: root dir of chrome os checkout
45
46    Returns:
47        (status, url) where status can be 'pass', 'fail', 'running',
48                    and url looks like:
49        gs://chromeos-image-archive/trybot-elm-release-tryjob/R67-10468.0.0-b20789
50    """
51    command = (
52        "cros buildresult --report json --buildbucket-id %s" % buildbucket_id
53    )
54    rc, out, _ = RunCommandInPath(chromeos_root, command)
55
56    # Current implementation of cros buildresult returns fail when a job is still
57    # running.
58    if rc != 0:
59        return ("running", None)
60
61    results = json.loads(out)[buildbucket_id]
62
63    # Handle the case where the tryjob failed to launch correctly.
64    if results["artifacts_url"] is None:
65        return (results["status"], "")
66
67    return (results["status"], results["artifacts_url"].rstrip("/"))
68
69
70def ParseTryjobBuildbucketId(msg):
71    """Find the buildbucket-id in the messages from `cros tryjob`.
72
73    Args:
74        msg: messages from `cros tryjob`
75
76    Returns:
77        buildbucket-id, which will be passed to `cros buildresult`
78    """
79    output_list = ast.literal_eval(msg)
80    output_dict = output_list[0]
81    if "buildbucket_id" in output_dict:
82        return output_dict["buildbucket_id"]
83    return None
84
85
86def SubmitTryjob(
87    chromeos_root,
88    buildbot_name,
89    patch_list,
90    tryjob_flags=None,
91    build_toolchain=False,
92):
93    """Calls `cros tryjob ...`
94
95    Args:
96        chromeos_root: the path to the ChromeOS root, needed for finding chromite
97            and launching the buildbot.
98        buildbot_name: the name of the buildbot queue, such as lumpy-release or
99            daisy-paladin.
100        patch_list: a python list of the patches, if any, for the buildbot to use.
101        tryjob_flags: See cros tryjob --help for available options.
102        build_toolchain: builds and uses the latest toolchain, rather than the
103            prebuilt one in SDK.
104
105    Returns:
106        buildbucket id
107    """
108    patch_arg = ""
109    if patch_list:
110        for p in patch_list:
111            patch_arg = patch_arg + " -g " + repr(p)
112    if not tryjob_flags:
113        tryjob_flags = []
114    if build_toolchain:
115        tryjob_flags.append("--latest-toolchain")
116    tryjob_flags = " ".join(tryjob_flags)
117
118    # Launch buildbot with appropriate flags.
119    build = buildbot_name
120    command = "cros_sdk -- cros tryjob --yes --json --nochromesdk  %s %s %s" % (
121        tryjob_flags,
122        patch_arg,
123        build,
124    )
125    print("CMD: %s" % command)
126    _, out, _ = RunCommandInPath(chromeos_root, command)
127    buildbucket_id = ParseTryjobBuildbucketId(out)
128    print("buildbucket_id: %s" % repr(buildbucket_id))
129    if not buildbucket_id:
130        logger.GetLogger().LogFatal(
131            "Error occurred while launching trybot job: " "%s" % command
132        )
133    return buildbucket_id
134
135
136def GetTrybotImage(
137    chromeos_root,
138    buildbot_name,
139    patch_list,
140    tryjob_flags=None,
141    build_toolchain=False,
142    asynchronous=False,
143):
144    """Launch buildbot and get resulting trybot artifact name.
145
146    This function launches a buildbot with the appropriate flags to
147    build the test ChromeOS image, with the current ToT mobile compiler.  It
148    checks every 10 minutes to see if the trybot has finished.  When the trybot
149    has finished, it parses the resulting report logs to find the trybot
150    artifact (if one was created), and returns that artifact name.
151
152    Args:
153        chromeos_root: the path to the ChromeOS root, needed for finding chromite
154            and launching the buildbot.
155        buildbot_name: the name of the buildbot queue, such as lumpy-release or
156            daisy-paladin.
157        patch_list: a python list of the patches, if any, for the buildbot to use.
158        tryjob_flags: See cros tryjob --help for available options.
159        build_toolchain: builds and uses the latest toolchain, rather than the
160                       prebuilt one in SDK.
161        asynchronous: don't wait for artifacts; just return the buildbucket id
162
163    Returns:
164        (buildbucket id, partial image url) e.g.
165        (8952271933586980528, trybot-elm-release-tryjob/R67-10480.0.0-b2373596)
166    """
167    buildbucket_id = SubmitTryjob(
168        chromeos_root, buildbot_name, patch_list, tryjob_flags, build_toolchain
169    )
170    if asynchronous:
171        return buildbucket_id, " "
172
173    # The trybot generally takes more than 2 hours to finish.
174    # Wait two hours before polling the status.
175    time.sleep(INITIAL_SLEEP_TIME)
176    elapsed = INITIAL_SLEEP_TIME
177    status = "running"
178    image = ""
179    while True:
180        status, image = PeekTrybotImage(chromeos_root, buildbucket_id)
181        if status == "running":
182            if elapsed > TIME_OUT:
183                logger.GetLogger().LogFatal(
184                    "Unable to get build result for target %s." % buildbot_name
185                )
186            else:
187                wait_msg = "Unable to find build result; job may be running."
188                logger.GetLogger().LogOutput(wait_msg)
189            logger.GetLogger().LogOutput(f"{elapsed / 60} minutes elapsed.")
190            logger.GetLogger().LogOutput(f"Sleeping {SLEEP_TIME} seconds.")
191            time.sleep(SLEEP_TIME)
192            elapsed += SLEEP_TIME
193        else:
194            break
195
196    if not buildbot_name.endswith("-toolchain") and status == "fail":
197        # For rotating testers, we don't care about their status
198        # result, because if any HWTest failed it will be non-zero.
199        #
200        # The nightly performance tests do not run HWTests, so if
201        # their status is non-zero, we do care.  In this case
202        # non-zero means the image itself probably did not build.
203        image = ""
204
205    if not image:
206        logger.GetLogger().LogError(
207            "Trybot job (buildbucket id: %s) failed with"
208            "status %s; no trybot image generated. " % (buildbucket_id, status)
209        )
210    else:
211        # Convert full gs path to what crosperf expects. For example, convert
212        # gs://chromeos-image-archive/trybot-elm-release-tryjob/R67-10468.0.0-b20789
213        # to
214        # trybot-elm-release-tryjob/R67-10468.0.0-b20789
215        image = "/".join(image.split("/")[-2:])
216
217    logger.GetLogger().LogOutput("image is '%s'" % image)
218    logger.GetLogger().LogOutput("status is %s" % status)
219    return buildbucket_id, image
220
221
222def DoesImageExist(chromeos_root, build):
223    """Check if the image for the given build exists."""
224
225    ce = command_executer.GetCommandExecuter()
226    command = (
227        "gsutil ls gs://chromeos-image-archive/%s"
228        "/chromiumos_test_image.tar.xz" % (build)
229    )
230    ret = ce.ChrootRunCommand(chromeos_root, command, print_to_console=False)
231    return not ret
232
233
234def WaitForImage(chromeos_root, build):
235    """Wait for an image to be ready."""
236
237    elapsed_time = 0
238    while elapsed_time < TIME_OUT:
239        if DoesImageExist(chromeos_root, build):
240            return
241        logger.GetLogger().LogOutput(
242            "Image %s not ready, waiting for 10 minutes" % build
243        )
244        time.sleep(SLEEP_TIME)
245        elapsed_time += SLEEP_TIME
246
247    logger.GetLogger().LogOutput(
248        "Image %s not found, waited for %d hours" % (build, (TIME_OUT / 3600))
249    )
250    raise BuildbotTimeout("Timeout while waiting for image %s" % build)
251
252
253def GetLatestImage(chromeos_root, path):
254    """Get latest image"""
255
256    fmt = re.compile(r"R([0-9]+)-([0-9]+).([0-9]+).([0-9]+)")
257
258    ce = command_executer.GetCommandExecuter()
259    command = "gsutil ls gs://chromeos-image-archive/%s" % path
260    ret, out, _ = ce.ChrootRunCommandWOutput(
261        chromeos_root, command, print_to_console=False
262    )
263    if ret != 0:
264        raise RuntimeError("Failed to list buckets with command: %s." % command)
265    candidates = [l.split("/")[-2] for l in out.split()]
266    candidates = [fmt.match(c) for c in candidates]
267    candidates = [
268        [int(r) for r in m.group(1, 2, 3, 4)] for m in candidates if m
269    ]
270    candidates.sort(reverse=True)
271    for c in candidates:
272        build = "%s/R%d-%d.%d.%d" % (path, c[0], c[1], c[2], c[3])
273        if DoesImageExist(chromeos_root, build):
274            return build
275
276
277def GetLatestRecipeImage(chromeos_root, path):
278    """Get latest nightly test image from recipe bucket.
279
280    Image location example:
281    $ARCHIVE/lulu-llvm-next-nightly/R84-13037.0.0-31011-8883172717979984032
282    """
283
284    fmt = re.compile(r"R([0-9]+)-([0-9]+).([0-9]+).([0-9]+)-([0-9]+)")
285
286    ce = command_executer.GetCommandExecuter()
287    command = "gsutil ls gs://chromeos-image-archive/%s" % path
288    ret, out, _ = ce.ChrootRunCommandWOutput(
289        chromeos_root, command, print_to_console=False
290    )
291    if ret != 0:
292        raise RuntimeError("Failed to list buckets with command: %s." % command)
293    candidates = [l.split("/")[-2] for l in out.split()]
294    candidates = [(fmt.match(c), c) for c in candidates]
295    candidates = [
296        ([int(r) for r in m[0].group(1, 2, 3, 4, 5)], m[1])
297        for m in candidates
298        if m
299    ]
300    candidates.sort(key=lambda x: x[0], reverse=True)
301    # Try to get ony last two days of images since nightly tests are run once
302    # another day.
303    for c in candidates[:2]:
304        build = "%s/%s" % (path, c[1])
305        if DoesImageExist(chromeos_root, build):
306            return build
307