xref: /aosp_15_r20/external/autotest/client/common_lib/cros/dev_server.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# Lint as: python2, python3
2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6from __future__ import absolute_import
7from __future__ import division
8from __future__ import print_function
9
10from distutils import version
11import json
12import logging
13import multiprocessing
14import os
15import re
16import shutil
17import subprocess
18from threading import Timer
19import six
20from six.moves import urllib
21import six.moves.html_parser
22import six.moves.http_client
23import six.moves.urllib.parse
24import time
25
26from autotest_lib.client.bin import utils as bin_utils
27from autotest_lib.client.common_lib import android_utils
28from autotest_lib.client.common_lib import error
29from autotest_lib.client.common_lib import global_config
30from autotest_lib.client.common_lib import seven
31from autotest_lib.client.common_lib import utils
32from autotest_lib.client.common_lib.cros import retry
33
34
35try:
36    from autotest_lib.utils.frozen_chromite.lib import metrics
37except ImportError:
38    metrics = utils.metrics_mock
39
40
41CONFIG = global_config.global_config
42# This file is generated at build time and specifies, per suite and per test,
43# the DEPENDENCIES list specified in each control file.  It's a dict of dicts:
44# {'bvt':   {'/path/to/autotest/control/site_tests/test1/control': ['dep1']}
45#  'suite': {'/path/to/autotest/control/site_tests/test2/control': ['dep2']}
46#  'power': {'/path/to/autotest/control/site_tests/test1/control': ['dep1'],
47#            '/path/to/autotest/control/site_tests/test3/control': ['dep3']}
48# }
49DEPENDENCIES_FILE = 'test_suites/dependency_info'
50# Number of seconds for caller to poll devserver's is_staged call to check if
51# artifacts are staged.
52_ARTIFACT_STAGE_POLLING_INTERVAL = 5
53# Artifacts that should be staged when client calls devserver RPC to stage an
54# image.
55_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = 'full_payload,test_suites,stateful'
56# Artifacts that should be staged when client calls devserver RPC to stage an
57# image with autotest artifact.
58_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST = ('full_payload,test_suites,'
59                                                   'control_files,stateful,'
60                                                   'autotest_packages')
61# Artifacts that should be staged when client calls devserver RPC to stage an
62# Android build.
63SKIP_DEVSERVER_HEALTH_CHECK = CONFIG.get_config_value(
64        'CROS', 'skip_devserver_health_check', type=bool)
65# Number of seconds for the call to get devserver load to time out.
66TIMEOUT_GET_DEVSERVER_LOAD = 2.0
67
68# Android artifact path in devserver
69ANDROID_BUILD_NAME_PATTERN = CONFIG.get_config_value(
70        'CROS', 'android_build_name_pattern', type=str).replace('\\', '')
71
72# Return value from a devserver RPC indicating the call succeeded.
73SUCCESS = 'Success'
74
75# The timeout minutes for a given devserver ssh call.
76DEVSERVER_SSH_TIMEOUT_MINS = 1
77
78# Error message for invalid devserver response.
79ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE = 'Proxy Error'
80ERR_MSG_FOR_DOWN_DEVSERVER = 'Service Unavailable'
81
82# Error message for devserver call timedout.
83ERR_MSG_FOR_TIMED_OUT_CALL = 'timeout'
84
85# The timeout minutes for waiting a devserver staging.
86DEVSERVER_IS_STAGING_RETRY_MIN = 100
87
88# Provision error patterns.
89# People who see this should know that they shouldn't change these
90# classification strings. These strings are used for monitoring provision
91# failures. Any changes may mess up the stats.
92_EXCEPTION_PATTERNS = [
93        # Raised when devserver portfile does not exist on host.
94        (r".*Devserver portfile does not exist!.*$",
95         '(1) Devserver portfile does not exist on host'),
96        # Raised when devserver cannot copy packages to host.
97        (r".*Could not copy .* to device.*$",
98         '(2) Cannot copy packages to host'),
99        # Raised when devserver fails to run specific commands on host.
100        (r".*cwd=None, extra env=\{'LC_MESSAGES': 'C'\}.*$",
101         '(3) Fail to run specific command on host'),
102        # Raised when new build fails to boot on the host.
103        (r'.*RootfsUpdateError: Build .* failed to boot on.*$',
104         '(4) Build failed to boot on host'),
105        # Raised when the auto-update process is timed out.
106        (r'.*The CrOS auto-update process is timed out, '
107         'thus will be terminated.*$',
108         '(5) Auto-update is timed out'),
109        # Raised when the host is not pingable.
110        (r".*DeviceNotPingableError.*$",
111         '(6) Host is not pingable during auto-update'),
112        # Raised when hosts have unexpected status after rootfs update.
113        (r'.*Update failed with unexpected update status: '
114         'UPDATE_STATUS_IDLE.*$',
115         '(7) Host has unexpected status: UPDATE_STATUS_IDLE after rootfs '
116         'update'),
117        # Raised when devserver returns non-json response to shard/drone.
118        (r'.*No JSON object could be decoded.*$',
119         '(8) Devserver returned non-json object'),
120        # Raised when devserver loses host's ssh connection
121        (r'.*SSHConnectionError\: .* port 22\: Connection timed out.*$',
122         "(9) Devserver lost host's ssh connection"),
123        # Raised when error happens in writing files to host
124        (r'.*Write failed\: Broken pipe.*$',
125         "(10) Broken pipe while writing or connecting to host")]
126
127PREFER_LOCAL_DEVSERVER = CONFIG.get_config_value(
128        'CROS', 'prefer_local_devserver', type=bool, default=False)
129
130ENABLE_SSH_CONNECTION_FOR_DEVSERVER = CONFIG.get_config_value(
131        'CROS', 'enable_ssh_connection_for_devserver', type=bool,
132        default=False)
133
134DEFAULT_SUBNET_MASKBIT = 19
135
136
137class DevServerException(Exception):
138    """Raised when the dev server returns a non-200 HTTP response."""
139    pass
140
141
142class DevServerOverloadException(Exception):
143    """Raised when the dev server returns a 502 HTTP response."""
144    pass
145
146class DevServerFailToLocateException(Exception):
147    """Raised when fail to locate any devserver."""
148    pass
149
150
151class MarkupStripper(six.moves.html_parser.HTMLParser):
152    """HTML parser that strips HTML tags, coded characters like &
153
154    Works by, basically, not doing anything for any tags, and only recording
155    the content of text nodes in an internal data structure.
156    """
157    def __init__(self):
158        self.reset()
159        self.fed = []
160        self.convert_charrefs = True
161
162
163    def handle_data(self, d):
164        """Consume content of text nodes, store it away."""
165        self.fed.append(d)
166
167
168    def get_data(self):
169        """Concatenate and return all stored data."""
170        return ''.join(self.fed)
171
172
173def _strip_http_message(message):
174    """Strip the HTTP marker from the an HTTP message.
175
176    @param message: A string returned by an HTTP call.
177
178    @return: A string with HTTP marker being stripped.
179    """
180    strip = MarkupStripper()
181    try:
182        strip.feed(seven.ensure_text(message, 'utf_32'))
183    except UnicodeDecodeError:
184        strip.feed(message)
185    return strip.get_data()
186
187
188def _get_image_storage_server():
189    image_path = CONFIG.get_config_value('CROS',
190                                         'image_storage_server',
191                                         type=str)
192    # see b/203531740; this forces a trailing / if not there yet.
193    return os.path.join(image_path, '')
194
195
196def _get_canary_channel_server():
197    """
198    Get the url of the canary-channel server,
199    eg: gsutil://chromeos-releases/canary-channel/<board>/<release>
200
201    @return: The url to the canary channel server.
202    """
203    image_path = CONFIG.get_config_value('CROS',
204                                         'canary_channel_server',
205                                         type=str)
206    # see b/203531740; this forces a trailing / if not there yet.
207    return os.path.join(image_path, '')
208
209
210def _get_storage_server_for_artifacts(artifacts=None):
211    """Gets the appropriate storage server for the given artifacts.
212
213    @param artifacts: A list of artifacts we need to stage.
214    @return: The address of the storage server that has these artifacts.
215             The default image storage server if no artifacts are specified.
216    """
217    factory_artifact = global_config.global_config.get_config_value(
218            'CROS', 'factory_artifact', type=str, default='')
219    if artifacts and factory_artifact and factory_artifact in artifacts:
220        return _get_canary_channel_server()
221    return _get_image_storage_server()
222
223
224def _gs_or_local_archive_url_args(archive_url):
225    """Infer the devserver call arguments to use with the given archive_url.
226
227    @param archive_url: The archive url to include the in devserver RPC. This
228            can either e a GS path or a local path.
229    @return: A dict of arguments to include in the devserver call.
230    """
231    if not archive_url:
232        return {}
233    elif archive_url.startswith('gs://'):
234        return {'archive_url': archive_url}
235    else:
236        # For a local path, we direct the devserver to move the files while
237        # staging. This is the fastest way to stage local files, but deletes the
238        # files from the source. This is OK because the files are available on
239        # the devserver once staged.
240        return {
241                'local_path': archive_url,
242                'delete_source': True,
243        }
244
245
246def _reverse_lookup_from_config(address):
247    """Look up hostname for the given IP address.
248
249    This uses the hostname-address map from the config file.
250
251    If multiple hostnames map to the same IP address, the first one
252    defined in the configuration file takes precedence.
253
254    @param address: IP address string
255    @returns: hostname string, or original input if not found
256    """
257    for hostname, addr in six.iteritems(_get_hostname_addr_map()):
258        if addr == address:
259            return hostname
260    return address
261
262
263def _get_hostname_addr_map():
264    """Get hostname address mapping from config.
265
266    @return: dict mapping server hostnames to addresses
267    """
268    return CONFIG.get_section_as_dict('HOSTNAME_ADDR_MAP')
269
270
271def _get_dev_server_list():
272    return CONFIG.get_config_value('CROS', 'dev_server', type=list, default=[])
273
274
275def _get_crash_server_list():
276    return CONFIG.get_config_value('CROS', 'crash_server', type=list,
277        default=[])
278
279
280def remote_devserver_call(timeout_min=DEVSERVER_IS_STAGING_RETRY_MIN,
281                          exception_to_raise=DevServerException):
282    """A decorator to use with remote devserver calls.
283
284    This decorator converts urllib2.HTTPErrors into DevServerExceptions
285    with any embedded error info converted into plain text. The method
286    retries on urllib2.URLError or error.CmdError to avoid devserver flakiness.
287    """
288    #pylint: disable=C0111
289
290    def inner_decorator(method):
291        label = method.__name__ if hasattr(method, '__name__') else None
292        def metrics_wrapper(*args, **kwargs):
293            @retry.retry((urllib.error.URLError, error.CmdError,
294                          DevServerOverloadException),
295                         timeout_min=timeout_min,
296                         exception_to_raise=exception_to_raise,
297                        label=label)
298            def wrapper():
299                """This wrapper actually catches the HTTPError."""
300                try:
301                    return method(*args, **kwargs)
302                except urllib.error.HTTPError as e:
303                    error_markup = e.read()
304                    raise DevServerException(_strip_http_message(error_markup))
305
306            try:
307                return wrapper()
308            except Exception as e:
309                if ERR_MSG_FOR_TIMED_OUT_CALL in str(e):
310                    dev_server = None
311                    if args and isinstance(args[0], DevServer):
312                        dev_server = args[0].hostname
313                    elif 'devserver' in kwargs:
314                        dev_server = get_hostname(kwargs['devserver'])
315
316                    logging.debug('RPC call %s has timed out on devserver %s.',
317                                  label, dev_server)
318                    c = metrics.Counter(
319                            'chromeos/autotest/devserver/call_timeout')
320                    c.increment(fields={'dev_server': dev_server,
321                                        'healthy': label})
322
323                raise
324
325        return metrics_wrapper
326
327    return inner_decorator
328
329
330def get_hostname(url):
331    """Get the hostname portion of a URL
332
333    schema://hostname:port/path
334
335    @param url: a Url string
336    @return: a hostname string
337    """
338    return six.moves.urllib.parse.urlparse(url).hostname
339
340
341def get_resolved_hostname(url):
342    """Get the symbolic hostname from url.
343
344    If the given `url` uses a numeric IP address, try and find a
345    symbolic name from the hostname map in the config file.
346
347    @param url  The URL with which to perform the conversion/lookup.
348    """
349    return _reverse_lookup_from_config(get_hostname(url))
350
351
352class DevServer(object):
353    """Base class for all DevServer-like server stubs.
354
355    This is the base class for interacting with all Dev Server-like servers.
356    A caller should instantiate a sub-class of DevServer with:
357
358    host = SubClassServer.resolve(build)
359    server = SubClassServer(host)
360    """
361    _MIN_FREE_DISK_SPACE_GB = 20
362    _MAX_APACHE_CLIENT_COUNT = 75
363    # Threshold for the CPU load percentage for a devserver to be selected.
364    MAX_CPU_LOAD = 80.0
365    # Threshold for the network IO, set to 80MB/s
366    MAX_NETWORK_IO = 1024 * 1024 * 80
367    DISK_IO = 'disk_total_bytes_per_second'
368    NETWORK_IO = 'network_total_bytes_per_second'
369    CPU_LOAD = 'cpu_percent'
370    FREE_DISK = 'free_disk'
371    AU_PROCESS = 'au_process_count'
372    STAGING_THREAD_COUNT = 'staging_thread_count'
373    APACHE_CLIENT_COUNT = 'apache_client_count'
374
375
376    def __init__(self, devserver):
377        self._devserver = devserver
378
379
380    def url(self):
381        """Returns the url for this devserver."""
382        return self._devserver
383
384
385    @property
386    def hostname(self):
387        """Return devserver hostname parsed from the devserver URL.
388
389        Note that this is likely parsed from the devserver URL from
390        shadow_config.ini, meaning that the "hostname" part of the
391        devserver URL is actually an IP address.
392
393        @return hostname string
394        """
395        return get_hostname(self.url())
396
397
398    @property
399    def resolved_hostname(self):
400        """Return devserver hostname, resolved from its IP address.
401
402        Unlike the hostname property, this property attempts to look up
403        the proper hostname from the devserver IP address.  If lookup
404        fails, then fall back to whatever the hostname property would
405        have returned.
406
407        @return hostname string
408        """
409        return _reverse_lookup_from_config(self.hostname)
410
411
412    @staticmethod
413    def get_server_url(url):
414        """Get the devserver url from a repo url, which includes build info.
415
416        @param url: A job repo url.
417
418        @return A devserver url, e.g., http://127.0.0.10:8080
419        """
420        res = six.moves.urllib.parse.urlparse(url)
421        if res.netloc:
422            return res.scheme + '://' + res.netloc
423
424
425    @classmethod
426    def get_devserver_load_wrapper(cls, devserver, timeout_sec, output):
427        """A wrapper function to call get_devserver_load in parallel.
428
429        @param devserver: url of the devserver.
430        @param timeout_sec: Number of seconds before time out the devserver
431                            call.
432        @param output: An output queue to save results to.
433        """
434        load = cls.get_devserver_load(devserver, timeout_min=timeout_sec/60.0)
435        if load:
436            load['devserver'] = devserver
437        output.put(load)
438
439
440    @classmethod
441    def get_devserver_load(cls, devserver,
442                           timeout_min=DEVSERVER_SSH_TIMEOUT_MINS):
443        """Returns True if the |devserver| is healthy to stage build.
444
445        @param devserver: url of the devserver.
446        @param timeout_min: How long to wait in minutes before deciding the
447                            the devserver is not up (float).
448
449        @return: A dictionary of the devserver's load.
450
451        """
452        call = cls._build_call(devserver, 'check_health')
453        @remote_devserver_call(timeout_min=timeout_min)
454        def get_load(devserver=devserver):
455            """Inner method that makes the call."""
456            return cls.run_call(call, timeout=timeout_min*60)
457
458        try:
459            return json.load(six.StringIO(get_load(devserver=devserver)))
460        except Exception as e:
461            logging.error('Devserver call failed: "%s", timeout: %s seconds,'
462                          ' Error: %s', call, timeout_min * 60, e)
463
464
465    @classmethod
466    def is_free_disk_ok(cls, load):
467        """Check if a devserver has enough free disk.
468
469        @param load: A dict of the load of the devserver.
470
471        @return: True if the devserver has enough free disk or disk check is
472                 skipped in global config.
473
474        """
475        if SKIP_DEVSERVER_HEALTH_CHECK:
476            logging.debug('devserver health check is skipped.')
477        elif load[cls.FREE_DISK] < cls._MIN_FREE_DISK_SPACE_GB:
478            return False
479
480        return True
481
482
483    @classmethod
484    def is_apache_client_count_ok(cls, load):
485        """Check if a devserver has enough Apache connections available.
486
487        Apache server by default has maximum of 150 concurrent connections. If
488        a devserver has too many live connections, it likely indicates the
489        server is busy handling many long running download requests, e.g.,
490        downloading stateful partitions. It is better not to add more requests
491        to it.
492
493        @param load: A dict of the load of the devserver.
494
495        @return: True if the devserver has enough Apache connections available,
496                 or disk check is skipped in global config.
497
498        """
499        if SKIP_DEVSERVER_HEALTH_CHECK:
500            logging.debug('devserver health check is skipped.')
501        elif cls.APACHE_CLIENT_COUNT not in load:
502            logging.debug('Apache client count is not collected from devserver.')
503        elif (load[cls.APACHE_CLIENT_COUNT] >
504              cls._MAX_APACHE_CLIENT_COUNT):
505            return False
506
507        return True
508
509
510    @classmethod
511    def devserver_healthy(cls, devserver,
512                          timeout_min=DEVSERVER_SSH_TIMEOUT_MINS):
513        """Returns True if the |devserver| is healthy to stage build.
514
515        @param devserver: url of the devserver.
516        @param timeout_min: How long to wait in minutes before deciding the
517                            the devserver is not up (float).
518
519        @return: True if devserver is healthy. Return False otherwise.
520
521        """
522        c = metrics.Counter('chromeos/autotest/devserver/devserver_healthy')
523        reason = ''
524        healthy = False
525        load = cls.get_devserver_load(devserver, timeout_min=timeout_min)
526        try:
527            if not load:
528                # Failed to get the load of devserver.
529                reason = '(1) Failed to get load.'
530                return False
531
532            apache_ok = cls.is_apache_client_count_ok(load)
533            if not apache_ok:
534                reason = '(2) Apache client count too high.'
535                logging.error('Devserver check_health failed. Live Apache client '
536                              'count is too high: %d.',
537                              load[cls.APACHE_CLIENT_COUNT])
538                return False
539
540            disk_ok = cls.is_free_disk_ok(load)
541            if not disk_ok:
542                reason = '(3) Disk space too low.'
543                logging.error('Devserver check_health failed. Free disk space is '
544                              'low. Only %dGB is available.',
545                              load[cls.FREE_DISK])
546            healthy = bool(disk_ok)
547            return disk_ok
548        finally:
549            c.increment(fields={'dev_server': cls(devserver).resolved_hostname,
550                                'healthy': healthy,
551                                'reason': reason})
552            # Monitor how many AU processes the devserver is currently running.
553            if load is not None and load.get(DevServer.AU_PROCESS):
554                c_au = metrics.Gauge(
555                        'chromeos/autotest/devserver/devserver_au_count')
556                c_au.set(
557                    load.get(DevServer.AU_PROCESS),
558                    fields={'dev_server': cls(devserver).resolved_hostname})
559
560
561    @staticmethod
562    def _build_call(host, method, **kwargs):
563        """Build a URL to |host| that calls |method|, passing |kwargs|.
564
565        Builds a URL that calls |method| on the dev server defined by |host|,
566        passing a set of key/value pairs built from the dict |kwargs|.
567
568        @param host: a string that is the host basename e.g. http://server:90.
569        @param method: the dev server method to call.
570        @param kwargs: a dict mapping arg names to arg values.
571        @return the URL string.
572        """
573        # If the archive_url is a local path, the args expected by the devserver
574        # are a little different.
575        archive_url_args = _gs_or_local_archive_url_args(
576                kwargs.pop('archive_url', None))
577        kwargs.update(archive_url_args)
578        if 'is_async' in kwargs:
579            f = kwargs.pop('is_async')
580            kwargs['async'] = f
581        argstr = '&'.join(["%s=%s" % x for x in six.iteritems(kwargs)])
582        return "%(host)s/%(method)s?%(argstr)s" % dict(
583                host=host, method=method, argstr=argstr)
584
585
586    def build_call(self, method, **kwargs):
587        """Builds a devserver RPC string that is used by 'run_call()'.
588
589        @param method: remote devserver method to call.
590        """
591        return self._build_call(self._devserver, method, **kwargs)
592
593
594    @classmethod
595    def build_all_calls(cls, method, **kwargs):
596        """Builds a list of URLs that makes RPC calls on all devservers.
597
598        Build a URL that calls |method| on the dev server, passing a set
599        of key/value pairs built from the dict |kwargs|.
600
601        @param method: the dev server method to call.
602        @param kwargs: a dict mapping arg names to arg values
603
604        @return the URL string
605        """
606        calls = []
607        # Note we use cls.servers as servers is class specific.
608        for server in cls.servers():
609            if cls.devserver_healthy(server):
610                calls.append(cls._build_call(server, method, **kwargs))
611
612        return calls
613
614
615    @classmethod
616    def run_request(cls, call, timeout=None):
617        """Invoke a given devserver call using urllib.open.
618
619        Open the URL with HTTP, and return the text of the response. Exceptions
620        may be raised as for urllib2.urlopen().
621
622        @param call: a url string that calls a method to a devserver.
623        @param timeout: The timeout seconds for this urlopen call.
624
625        @return A HTTPResponse object.
626        """
627        if timeout is None:
628            return urllib.request.urlopen(call)
629        else:
630            return utils.urlopen_socket_timeout(call, timeout=timeout)
631
632    @classmethod
633    def run_call(cls, call, readline=False, timeout=None):
634        """Invoke a given devserver call using urllib.open.
635
636        Open the URL with HTTP, and return the text of the response. Exceptions
637        may be raised as for urllib2.urlopen().
638
639        @param call: a url string that calls a method to a devserver.
640        @param readline: whether read http response line by line.
641        @param timeout: The timeout seconds for this urlopen call.
642
643        @return the results of this call.
644        """
645        response = cls.run_request(call, timeout=timeout)
646        if readline:
647            return [line.rstrip() for line in response]
648        else:
649            return response.read()
650
651
652    @staticmethod
653    def servers():
654        """Returns a list of servers that can serve as this type of server."""
655        raise NotImplementedError()
656
657
658    @classmethod
659    def get_devservers_in_same_subnet(cls, ip, mask_bits=DEFAULT_SUBNET_MASKBIT,
660                                      unrestricted_only=False):
661        """Get the devservers in the same subnet of the given ip.
662
663        @param ip: The IP address of a dut to look for devserver.
664        @param mask_bits: Number of mask bits. Default is 19.
665        @param unrestricted_only: Set to True to select from devserver in
666                unrestricted subnet only. Default is False.
667
668        @return: A list of devservers in the same subnet of the given ip.
669
670        """
671        # server from cls.servers() is a URL, e.g., http://10.1.1.10:8082, so
672        # we need a dict to return the full devserver path once the IPs are
673        # filtered in get_servers_in_same_subnet.
674        server_names = {}
675        all_devservers = []
676        devservers = (cls.get_unrestricted_devservers() if unrestricted_only
677                      else cls.servers())
678        for server in devservers:
679            server_name = get_hostname(server)
680            server_names[server_name] = server
681            all_devservers.append(server_name)
682        if not all_devservers:
683            devserver_type = 'unrestricted only' if unrestricted_only else 'all'
684            raise DevServerFailToLocateException(
685                'Fail to locate a devserver for dut %s in %s devservers'
686                % (ip, devserver_type))
687
688        devservers = utils.get_servers_in_same_subnet(ip, mask_bits,
689                                                      all_devservers)
690        return [server_names[s] for s in devservers]
691
692
693    @classmethod
694    def get_unrestricted_devservers(
695                cls, restricted_subnets=utils.RESTRICTED_SUBNETS):
696        """Get the devservers not in any restricted subnet specified in
697        restricted_subnets.
698
699        @param restricted_subnets: A list of restriected subnets.
700
701        @return: A list of devservers not in any restricted subnet.
702
703        """
704        if not restricted_subnets:
705            return cls.servers()
706
707        metrics.Counter('chromeos/autotest/devserver/unrestricted_hotfix')
708        return cls.servers()
709
710    @classmethod
711    def get_healthy_devserver(cls, build, devservers, ban_list=None):
712        """"Get a healthy devserver instance from the list of devservers.
713
714        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514).
715        @param devservers: The devserver list to be chosen out a healthy one.
716        @param ban_list: The ban_list of devservers we don't want to choose.
717                Default is None.
718
719        @return: A DevServer object of a healthy devserver. Return None if no
720                healthy devserver is found.
721
722        """
723        logging.debug('Pick one healthy devserver from %r', devservers)
724        while devservers:
725            hash_index = hash(build) % len(devservers)
726            devserver = devservers.pop(hash_index)
727            logging.debug('Check health for %s', devserver)
728            if ban_list and devserver in ban_list:
729                continue
730
731            if cls.devserver_healthy(devserver):
732                logging.debug('Pick %s', devserver)
733                return cls(devserver)
734
735
736    @classmethod
737    def get_available_devservers(cls,
738                                 hostname=None,
739                                 prefer_local_devserver=PREFER_LOCAL_DEVSERVER,
740                                 restricted_subnets=utils.ALL_SUBNETS):
741        """Get devservers in the same subnet of the given hostname.
742
743        @param hostname: Hostname of a DUT to choose devserver for.
744        @param prefer_local_devserver: A boolean indicating using a devserver in
745                                       the same subnet with the DUT.
746        @param restricted_subnets: A list of restricted subnets or p2p subnet
747                                   groups.
748
749        @return: A tuple of (devservers, can_retry), devservers is a list of
750                 devservers that's available for the given hostname. can_retry
751                 is a flag that indicate if caller can retry the selection of
752                 devserver if no devserver in the returned devservers can be
753                 used. For example, if hostname is in a restricted subnet,
754                 can_retry will be False.
755        """
756        logging.info('Getting devservers for host: %s',  hostname)
757        host_ip = None
758        if hostname:
759            host_ip = bin_utils.get_ip_address(hostname)
760            if not host_ip:
761                logging.error('Failed to get IP address of %s. Will pick a '
762                              'devserver without subnet constraint.', hostname)
763
764        if not host_ip:
765            return cls.get_unrestricted_devservers(restricted_subnets), False
766
767        # For the sake of backward compatibility, we use the argument
768        # 'restricted_subnets' to store both the legacy subnets (a tuple of
769        # (ip, mask)) and p2p subnets group (a list of subnets, i.e. [(ip,
770        # mask), ...]) data. For consistency, we convert all legacy subnets to
771        # a "singleton p2p subnets" and store them in a new list.
772        all_subnets = []
773        for s in restricted_subnets:
774            if isinstance(s, tuple):
775                all_subnets.append([s])
776            else:
777                all_subnets.append(s)
778
779        # Find devservers in the subnets reachable from the DUT.
780        if host_ip and all_subnets:
781            subnet_group = _get_subnet_group_for_host_ip(
782                    host_ip, all_subnets=all_subnets)
783            if subnet_group:
784                devservers = set()
785                for ip, mask in subnet_group:
786                    logging.debug(
787                            'The host %s (%s) is in a restricted subnet '
788                            '(or its peers). '
789                            'Try to locate devservers inside subnet '
790                            '%s/%d.', hostname, host_ip, ip, mask)
791                    devservers |= set(
792                            cls.get_devservers_in_same_subnet(ip, mask))
793                return sorted(devservers), False
794
795        # If prefer_local_devserver is set to True and the host is not in
796        # restricted subnet, pick a devserver in the same subnet if possible.
797        # Set can_retry to True so it can pick a different devserver if all
798        # devservers in the same subnet are down.
799        if prefer_local_devserver:
800            return (cls.get_devservers_in_same_subnet(
801                    host_ip, DEFAULT_SUBNET_MASKBIT, True), True)
802
803        return cls.get_unrestricted_devservers(restricted_subnets), False
804
805
806    @classmethod
807    def resolve(cls, build, hostname=None, ban_list=None):
808        """"Resolves a build to a devserver instance.
809
810        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514).
811        @param hostname: The hostname of dut that requests a devserver. It's
812                         used to make sure a devserver in the same subnet is
813                         preferred.
814        @param ban_list: The ban_list of devservers shouldn't be chosen.
815
816        @raise DevServerException: If no devserver is available.
817        """
818        tried_devservers = set()
819        devservers, can_retry = cls.get_available_devservers(hostname)
820        if devservers:
821            tried_devservers |= set(devservers)
822
823        devserver = cls.get_healthy_devserver(build, devservers,
824                                              ban_list=ban_list)
825
826        if not devserver and can_retry:
827            # Find available devservers without dut location constrain.
828            devservers, _ = cls.get_available_devservers()
829            devserver = cls.get_healthy_devserver(build, devservers,
830                                                  ban_list=ban_list)
831            if devservers:
832                tried_devservers |= set(devservers)
833        if devserver:
834            return devserver
835        else:
836            subnet = 'unrestricted subnet'
837            if hostname is not None:
838                host_ip = bin_utils.get_ip_address(hostname)
839                if host_ip:
840                    subnet_ip, mask_bits = _get_subnet_for_host_ip(host_ip)
841                    subnet = '%s/%s' % (str(subnet_ip), str(mask_bits))
842
843            error_msg = ('All devservers in subnet: %s are currently down: '
844                         '%s. (dut hostname: %s)' %
845                         (subnet, tried_devservers, hostname))
846            logging.error(error_msg)
847            c = metrics.Counter(
848                    'chromeos/autotest/devserver/subnet_without_devservers')
849            c.increment(fields={'subnet': subnet, 'hostname': str(hostname)})
850            raise DevServerException(error_msg)
851
852
853    @classmethod
854    def random(cls):
855        """Return a random devserver that's available.
856
857        Devserver election in `resolve` method is based on a hash of the
858        build that a caller wants to stage. The purpose is that different
859        callers requesting for the same build can get the same devserver,
860        while the lab is able to distribute different builds across all
861        devservers. That helps to reduce the duplication of builds across
862        all devservers.
863        This function returns a random devserver, by passing a random
864        pseudo build name to `resolve `method.
865        """
866        return cls.resolve(build=str(time.time()))
867
868
869class CrashServer(DevServer):
870    """Class of DevServer that symbolicates crash dumps."""
871
872    @staticmethod
873    def servers():
874        return _get_crash_server_list()
875
876
877    @remote_devserver_call()
878    def symbolicate_dump(self, minidump_path, build):
879        """Ask the devserver to symbolicate the dump at minidump_path.
880
881        Stage the debug symbols for |build| and, if that works, ask the
882        devserver to symbolicate the dump at |minidump_path|.
883
884        @param minidump_path: the on-disk path of the minidump.
885        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
886                      whose debug symbols are needed for symbolication.
887        @return The contents of the stack trace
888        @raise DevServerException upon any return code that's not HTTP OK.
889        """
890        try:
891            import requests
892        except ImportError:
893            logging.warning("Can't 'import requests' to connect to dev server.")
894            return ''
895        f = {'dev_server': self.resolved_hostname}
896        c = metrics.Counter('chromeos/autotest/crashserver/symbolicate_dump')
897        c.increment(fields=f)
898        # Symbolicate minidump.
899        m = 'chromeos/autotest/crashserver/symbolicate_dump_duration'
900        with metrics.SecondsTimer(m, fields=f):
901            call = self.build_call('symbolicate_dump',
902                                   archive_url=_get_image_storage_server() + build)
903            request = requests.post(
904                    call, files={'minidump': open(minidump_path, 'rb')})
905            if request.status_code == requests.codes.OK:
906                return request.text
907
908        error_fd = six.StringIO(request.text)
909        raise urllib.error.HTTPError(
910                call, request.status_code, request.text, request.headers,
911                error_fd)
912
913
914    @classmethod
915    def get_available_devservers(cls, hostname):
916        """Get all available crash servers.
917
918        Crash server election doesn't need to count the location of hostname.
919
920        @param hostname: Hostname of a DUT to choose devserver for.
921
922        @return: A tuple of (all crash servers, False). can_retry is set to
923                 False, as all crash servers are returned. There is no point to
924                 retry.
925        """
926        return cls.servers(), False
927
928
929class ImageServerBase(DevServer):
930    """Base class for devservers used to stage builds.
931
932    CrOS and Android builds are staged in different ways as they have different
933    sets of artifacts. This base class abstracts the shared functions between
934    the two types of ImageServer.
935    """
936
937    @classmethod
938    def servers(cls):
939        """Returns a list of servers that can serve as a desired type of
940        devserver.
941        """
942        return _get_dev_server_list()
943
944
945    def _get_image_url(self, image):
946        """Returns the url of the directory for this image on the devserver.
947
948        @param image: the image that was fetched.
949        """
950        image = self.translate(image)
951        url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern',
952                                              type=str)
953        return (url_pattern % (self.url(), image)).replace('update', 'static')
954
955
956    @staticmethod
957    def create_metadata(server_name, image, artifacts=None, files=None):
958        """Create a metadata dictionary given the staged items.
959
960        The metadata can be send to metadata db along with stats.
961
962        @param server_name: name of the devserver, e.g 172.22.33.44.
963        @param image: The name of the image.
964        @param artifacts: A list of artifacts.
965        @param files: A list of files.
966
967        @return A metadata dictionary.
968
969        """
970        metadata = {'devserver': server_name,
971                    'image': image,
972                    '_type': 'devserver'}
973        if artifacts:
974            metadata['artifacts'] = ' '.join(artifacts)
975        if files:
976            metadata['files'] = ' '.join(files)
977        return metadata
978
979
980    @classmethod
981    def run_ssh_call(cls, call, readline=False, timeout=None):
982        """Construct an ssh-based rpc call, and execute it.
983
984        @param call: a url string that calls a method to a devserver.
985        @param readline: whether read http response line by line.
986        @param timeout: The timeout seconds for ssh call.
987
988        @return the results of this call.
989        """
990        hostname = get_hostname(call)
991        ssh_call = 'ssh %s \'curl "%s"\'' % (hostname, utils.sh_escape(call))
992        timeout_seconds = timeout if timeout else DEVSERVER_SSH_TIMEOUT_MINS*60
993        try:
994            result = utils.run(ssh_call, timeout=timeout_seconds)
995        except error.CmdError as e:
996            logging.debug('Error occurred with exit_code %d when executing the '
997                          'ssh call: %s.', e.result_obj.exit_status,
998                          e.result_obj.stderr)
999            c = metrics.Counter('chromeos/autotest/devserver/ssh_failure')
1000            c.increment(fields={'dev_server': hostname})
1001            raise
1002        response = result.stdout
1003
1004        # If the curl command's returned HTTP response contains certain
1005        # exception string, raise the DevServerException of the response.
1006        if 'DownloaderException' in response:
1007            raise DevServerException(_strip_http_message(response))
1008
1009        if readline:
1010            # Remove line terminators and trailing whitespace
1011            response = response.splitlines()
1012            return [line.rstrip() for line in response]
1013
1014        return response
1015
1016
1017    @classmethod
1018    def run_call(cls, call, readline=False, timeout=None):
1019        """Invoke a given devserver call using urllib.open or ssh.
1020
1021        Open the URL with HTTP or SSH-based HTTP, and return the text of the
1022        response. Exceptions may be raised as for urllib2.urlopen() or
1023        utils.run().
1024
1025        @param call: a url string that calls a method to a devserver.
1026        @param readline: whether read http response line by line.
1027        @param timeout: The timeout seconds for urlopen call or ssh call.
1028
1029        @return the results of this call.
1030        """
1031        server_name = get_hostname(call)
1032        is_in_restricted_subnet = utils.get_restricted_subnet(
1033                server_name, utils.get_all_restricted_subnets())
1034        _EMPTY_SENTINEL_VALUE = object()
1035        def kickoff_call():
1036            """Invoke a given devserver call using urllib.open or ssh.
1037
1038            @param call: a url string that calls a method to a devserver.
1039            @param is_in_restricted_subnet: whether the devserver is in subnet.
1040            @param readline: whether read http response line by line.
1041            @param timeout: The timeout seconds for urlopen call or ssh call.
1042            """
1043            if (not ENABLE_SSH_CONNECTION_FOR_DEVSERVER or
1044                not is_in_restricted_subnet):
1045                response = super(ImageServerBase, cls).run_call(
1046                        call, readline=readline, timeout=timeout)
1047            else:
1048                response = cls.run_ssh_call(
1049                        call, readline=readline, timeout=timeout)
1050
1051            # six.ensure_str would be nice, but its not in all the envs, so
1052            # this is what we are left with for now.
1053            if isinstance(response, bytes):
1054                response = response.decode()
1055            # Retry if devserver service is temporarily down, e.g. in a
1056            # devserver push.
1057            if ERR_MSG_FOR_DOWN_DEVSERVER in response:
1058                return False
1059
1060            # Don't return response directly since it may be empty string,
1061            # which causes poll_for_condition to retry.
1062            return _EMPTY_SENTINEL_VALUE if not response else response
1063
1064        try:
1065            response = bin_utils.poll_for_condition(
1066                    kickoff_call,
1067                    exception=bin_utils.TimeoutError(),
1068                    timeout=60,
1069                    sleep_interval=5)
1070            return '' if response is _EMPTY_SENTINEL_VALUE else response
1071        except bin_utils.TimeoutError:
1072            return ERR_MSG_FOR_DOWN_DEVSERVER
1073
1074
1075    @classmethod
1076    def download_file(cls, remote_file, local_file, timeout=None):
1077        """Download file from devserver.
1078
1079        The format of remote_file should be:
1080            http://devserver_ip:8082/static/board/...
1081
1082        @param remote_file: The URL of the file on devserver that need to be
1083            downloaded.
1084        @param local_file: The path of the file saved to local.
1085        @param timeout: The timeout seconds for this call.
1086        """
1087        server_name = get_hostname(remote_file)
1088        is_in_restricted_subnet = utils.get_restricted_subnet(
1089                server_name, utils.get_all_restricted_subnets())
1090
1091        if (not ENABLE_SSH_CONNECTION_FOR_DEVSERVER
1092                    or not is_in_restricted_subnet):
1093            response = super(ImageServerBase, cls).run_request(remote_file,
1094                                                               timeout=timeout)
1095            with open(local_file, 'wb') as out_log:
1096                shutil.copyfileobj(response, out_log)
1097        else:
1098            timeout_seconds = timeout if timeout else DEVSERVER_SSH_TIMEOUT_MINS * 60
1099            # SSH to the dev server and attach the local file as stdout.
1100            with open(local_file, 'wb') as out_log:
1101                ssh_cmd = [
1102                        'ssh', server_name,
1103                        'curl -s -S -f "%s"' % utils.sh_escape(remote_file)
1104                ]
1105                logging.debug("Running command %s", ssh_cmd)
1106                with open(os.devnull) as devnull:
1107                    cmd = subprocess.Popen(
1108                            ssh_cmd,
1109                            stdout=out_log,
1110                            stdin=devnull,
1111                            stderr=subprocess.PIPE,
1112                    )
1113
1114                    # Python 2.7 doesn't have Popen.wait(timeout), so start a
1115                    # timer and kill the ssh process if it takes too long.
1116                    def stop_process():
1117                        """Kills the subprocess after the timeout."""
1118                        cmd.kill()
1119                        logging.error("ssh call timed out after %s secs",
1120                                      timeout_seconds)
1121
1122                    t = Timer(timeout_seconds, stop_process)
1123                    try:
1124                        t.start()
1125                        cmd.wait()
1126                    finally:
1127                        t.cancel()
1128                    error_output = cmd.stderr.read()
1129                    if error_output:
1130                        logging.error("ssh call output: %s", error_output)
1131                    if cmd.returncode != 0:
1132                        c = metrics.Counter(
1133                                'chromeos/autotest/devserver/ssh_failure')
1134                        c.increment(fields={'dev_server': server_name})
1135                        raise DevServerException(
1136                                "ssh call failed with exit code %s",
1137                                cmd.returncode)
1138
1139
1140    def _poll_is_staged(self, **kwargs):
1141        """Polling devserver.is_staged until all artifacts are staged.
1142
1143        @param kwargs: keyword arguments to make is_staged devserver call.
1144
1145        @return: True if all artifacts are staged in devserver.
1146        """
1147        call = self.build_call('is_staged', **kwargs)
1148
1149        def all_staged():
1150            """Call devserver.is_staged rpc to check if all files are staged.
1151
1152            @return: True if all artifacts are staged in devserver. False
1153                     otherwise.
1154            @rasies DevServerException, the exception is a wrapper of all
1155                    exceptions that were raised when devserver tried to download
1156                    the artifacts. devserver raises an HTTPError or a CmdError
1157                    when an exception was raised in the code. Such exception
1158                    should be re-raised here to stop the caller from waiting.
1159                    If the call to devserver failed for connection issue, a
1160                    URLError exception is raised, and caller should retry the
1161                    call to avoid such network flakiness.
1162
1163            """
1164            try:
1165                result = self.run_call(call)
1166                logging.debug('whether artifact is staged: %r', result)
1167                return result == 'True'
1168            except urllib.error.HTTPError as e:
1169                error_markup = e.read()
1170                raise DevServerException(_strip_http_message(error_markup))
1171            except urllib.error.URLError as e:
1172                # Could be connection issue, retry it.
1173                # For example: <urlopen error [Errno 111] Connection refused>
1174                logging.error('URLError happens in is_stage: %r', e)
1175                return False
1176            except error.CmdError as e:
1177                # Retry if SSH failed to connect to the devserver.
1178                logging.warning('CmdError happens in is_stage: %r, will retry', e)
1179                return False
1180
1181        bin_utils.poll_for_condition(
1182                all_staged,
1183                exception=bin_utils.TimeoutError(),
1184                timeout=DEVSERVER_IS_STAGING_RETRY_MIN * 60,
1185                sleep_interval=_ARTIFACT_STAGE_POLLING_INTERVAL)
1186
1187        return True
1188
1189
1190    def _call_and_wait(self, call_name, error_message,
1191                       expected_response=SUCCESS, **kwargs):
1192        """Helper method to make a urlopen call, and wait for artifacts staged.
1193
1194        @param call_name: name of devserver rpc call.
1195        @param error_message: Error message to be thrown if response does not
1196                              match expected_response.
1197        @param expected_response: Expected response from rpc, default to
1198                                  |Success|. If it's set to None, do not compare
1199                                  the actual response. Any response is consider
1200                                  to be good.
1201        @param kwargs: keyword arguments to make is_staged devserver call.
1202
1203        @return: The response from rpc.
1204        @raise DevServerException upon any return code that's expected_response.
1205
1206        """
1207        call = self.build_call(call_name, is_async=True, **kwargs)
1208        try:
1209            response = self.run_call(call)
1210            logging.debug('response for RPC: %r', response)
1211            if ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE in response:
1212                logging.debug('Proxy error happens in RPC call, '
1213                              'will retry in 30 seconds')
1214                time.sleep(30)
1215                raise DevServerOverloadException()
1216        except six.moves.http_client.BadStatusLine as e:
1217            logging.error(e)
1218            raise DevServerException('Received Bad Status line, Devserver %s '
1219                                     'might have gone down while handling '
1220                                     'the call: %s' % (self.url(), call))
1221
1222        if expected_response and not response == expected_response:
1223            raise DevServerException(error_message)
1224
1225        # `os_type` is needed in build a devserver call, but not needed for
1226        # wait_for_artifacts_staged, since that method is implemented by
1227        # each ImageServerBase child class.
1228        if 'os_type' in kwargs:
1229            del kwargs['os_type']
1230        self.wait_for_artifacts_staged(**kwargs)
1231        return response
1232
1233
1234    def _stage_artifacts(self, build, artifacts, files, archive_url, **kwargs):
1235        """Tell the devserver to download and stage |artifacts| from |image|
1236        specified by kwargs.
1237
1238        This is the main call point for staging any specific artifacts for a
1239        given build. To see the list of artifacts one can stage see:
1240
1241        ~src/platfrom/dev/artifact_info.py.
1242
1243        This is maintained along with the actual devserver code.
1244
1245        @param artifacts: A list of artifacts.
1246        @param files: A list of files to stage.
1247        @param archive_url: Optional parameter that has the archive_url to stage
1248                this artifact from. Default is specified in autotest config +
1249                image.
1250        @param kwargs: keyword arguments that specify the build information, to
1251                make stage devserver call.
1252
1253        @raise DevServerException upon any return code that's not HTTP OK.
1254        """
1255        if not archive_url:
1256            archive_url = os.path.join(
1257                    _get_storage_server_for_artifacts(artifacts), build)
1258
1259        artifacts_arg = ','.join(artifacts) if artifacts else ''
1260        files_arg = ','.join(files) if files else ''
1261        error_message = ("staging %s for %s failed;"
1262                         "HTTP OK not accompanied by 'Success'." %
1263                         ('artifacts=%s files=%s ' % (artifacts_arg, files_arg),
1264                          build))
1265
1266        staging_info = ('build=%s, artifacts=%s, files=%s, archive_url=%s' %
1267                        (build, artifacts, files, archive_url))
1268        logging.info('Staging artifacts on devserver %s: %s',
1269                     self.url(), staging_info)
1270        success = False
1271        try:
1272            arguments = {'archive_url': archive_url,
1273                         'artifacts': artifacts_arg,
1274                         'files': files_arg}
1275            if kwargs:
1276                arguments.update(kwargs)
1277            f = {'artifacts': artifacts_arg,
1278                 'dev_server': self.resolved_hostname}
1279            with metrics.SecondsTimer(
1280                    'chromeos/autotest/devserver/stage_artifact_duration',
1281                    fields=f):
1282                self.call_and_wait(call_name='stage', error_message=error_message,
1283                                   **arguments)
1284            logging.info('Finished staging artifacts: %s', staging_info)
1285            success = True
1286        except (bin_utils.TimeoutError, error.TimeoutException):
1287            logging.error('stage_artifacts timed out: %s', staging_info)
1288            raise DevServerException(
1289                    'stage_artifacts timed out: %s' % staging_info)
1290        finally:
1291            f = {'success': success,
1292                 'artifacts': artifacts_arg,
1293                 'dev_server': self.resolved_hostname}
1294            metrics.Counter('chromeos/autotest/devserver/stage_artifact'
1295                            ).increment(fields=f)
1296
1297
1298    def call_and_wait(self, *args, **kwargs):
1299        """Helper method to make a urlopen call, and wait for artifacts staged.
1300
1301        This method needs to be overridden in the subclass to implement the
1302        logic to call _call_and_wait.
1303        """
1304        raise NotImplementedError
1305
1306
1307    def _trigger_download(self, build, artifacts, files, synchronous=True,
1308                          **kwargs_build_info):
1309        """Tell the devserver to download and stage image specified in
1310        kwargs_build_info.
1311
1312        Tells the devserver to fetch |image| from the image storage server
1313        named by _get_image_storage_server().
1314
1315        If |synchronous| is True, waits for the entire download to finish
1316        staging before returning. Otherwise only the artifacts necessary
1317        to start installing images onto DUT's will be staged before returning.
1318        A caller can then call finish_download to guarantee the rest of the
1319        artifacts have finished staging.
1320
1321        @param synchronous: if True, waits until all components of the image are
1322               staged before returning.
1323        @param kwargs_build_info: Dictionary of build information.
1324                For CrOS, it is None as build is the CrOS image name.
1325                For Android, it is {'target': target,
1326                                    'build_id': build_id,
1327                                    'branch': branch}
1328
1329        @raise DevServerException upon any return code that's not HTTP OK.
1330
1331        """
1332        if kwargs_build_info:
1333            archive_url = None
1334        else:
1335            archive_url = _get_image_storage_server() + build
1336        error_message = ("trigger_download for %s failed;"
1337                         "HTTP OK not accompanied by 'Success'." % build)
1338        kwargs = {'archive_url': archive_url,
1339                  'artifacts': artifacts,
1340                  'files': files,
1341                  'error_message': error_message}
1342        if kwargs_build_info:
1343            kwargs.update(kwargs_build_info)
1344
1345        logging.info('trigger_download starts for %s', build)
1346        try:
1347            response = self.call_and_wait(call_name='stage', **kwargs)
1348            logging.info('trigger_download finishes for %s', build)
1349        except (bin_utils.TimeoutError, error.TimeoutException):
1350            logging.error('trigger_download timed out for %s.', build)
1351            raise DevServerException(
1352                    'trigger_download timed out for %s.' % build)
1353        was_successful = response == SUCCESS
1354        if was_successful and synchronous:
1355            self._finish_download(build, artifacts, files, **kwargs_build_info)
1356
1357
1358    def _finish_download(self, build, artifacts, files, **kwargs_build_info):
1359        """Tell the devserver to finish staging image specified in
1360        kwargs_build_info.
1361
1362        If trigger_download is called with synchronous=False, it will return
1363        before all artifacts have been staged. This method contacts the
1364        devserver and blocks until all staging is completed and should be
1365        called after a call to trigger_download.
1366
1367        @param kwargs_build_info: Dictionary of build information.
1368                For CrOS, it is None as build is the CrOS image name.
1369                For Android, it is {'target': target,
1370                                    'build_id': build_id,
1371                                    'branch': branch}
1372
1373        @raise DevServerException upon any return code that's not HTTP OK.
1374        """
1375        archive_url = _get_image_storage_server() + build
1376        error_message = ("finish_download for %s failed;"
1377                         "HTTP OK not accompanied by 'Success'." % build)
1378        kwargs = {'archive_url': archive_url,
1379                  'artifacts': artifacts,
1380                  'files': files,
1381                  'error_message': error_message}
1382        if kwargs_build_info:
1383            kwargs.update(kwargs_build_info)
1384        try:
1385            self.call_and_wait(call_name='stage', **kwargs)
1386        except (bin_utils.TimeoutError, error.TimeoutException):
1387            logging.error('finish_download timed out for %s', build)
1388            raise DevServerException(
1389                    'finish_download timed out for %s.' % build)
1390
1391
1392    @remote_devserver_call()
1393    def locate_file(self, file_name, artifacts, build, build_info):
1394        """Locate a file with the given file_name on devserver.
1395
1396        This method calls devserver RPC `locate_file` to look up a file with
1397        the given file name inside specified build artifacts.
1398
1399        @param file_name: Name of the file to look for a file.
1400        @param artifacts: A list of artifact names to search for the file.
1401        @param build: Name of the build. For Android, it's None as build_info
1402                should be used.
1403        @param build_info: Dictionary of build information.
1404                For CrOS, it is None as build is the CrOS image name.
1405                For Android, it is {'target': target,
1406                                    'build_id': build_id,
1407                                    'branch': branch}
1408
1409        @return: A devserver url to the file.
1410        @raise DevServerException upon any return code that's not HTTP OK.
1411        """
1412        if not build and not build_info:
1413            raise DevServerException('You must specify build information to '
1414                                     'look for file %s in artifacts %s.' %
1415                                     (file_name, artifacts))
1416        kwargs = {'file_name': file_name,
1417                  'artifacts': artifacts}
1418        if build_info:
1419            build_path = '%(branch)s/%(target)s/%(build_id)s' % build_info
1420            kwargs.update(build_info)
1421            # Devserver treats Android and Brillo build in the same way as they
1422            # are both retrieved from Launch Control and have similar build
1423            # artifacts. Therefore, os_type for devserver calls is `android` for
1424            # both Android and Brillo builds.
1425            kwargs['os_type'] = 'android'
1426        else:
1427            build_path = build
1428            kwargs['build'] = build
1429        call = self.build_call('locate_file', is_async=False, **kwargs)
1430        try:
1431            file_path = self.run_call(call)
1432            return os.path.join(self.url(), 'static', build_path, file_path)
1433        except six.moves.http_client.BadStatusLine as e:
1434            logging.error(e)
1435            raise DevServerException('Received Bad Status line, Devserver %s '
1436                                     'might have gone down while handling '
1437                                     'the call: %s' % (self.url(), call))
1438
1439
1440    @remote_devserver_call()
1441    def list_control_files(self, build, suite_name=''):
1442        """Ask the devserver to list all control files for |build|.
1443
1444        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1445                      whose control files the caller wants listed.
1446        @param suite_name: The name of the suite for which we require control
1447                           files.
1448        @return None on failure, or a list of control file paths
1449                (e.g. server/site_tests/autoupdate/control)
1450        @raise DevServerException upon any return code that's not HTTP OK.
1451        """
1452        build = self.translate(build)
1453        call = self.build_call('controlfiles', build=build,
1454                               suite_name=suite_name)
1455        return self.run_call(call, readline=True)
1456
1457
1458    @remote_devserver_call()
1459    def get_control_file(self, build, control_path):
1460        """Ask the devserver for the contents of a control file.
1461
1462        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1463                      whose control file the caller wants to fetch.
1464        @param control_path: The file to fetch
1465                             (e.g. server/site_tests/autoupdate/control)
1466        @return The contents of the desired file.
1467        @raise DevServerException upon any return code that's not HTTP OK.
1468        """
1469        build = self.translate(build)
1470        call = self.build_call('controlfiles', build=build,
1471                               control_path=control_path)
1472        return self.run_call(call)
1473
1474
1475    @remote_devserver_call()
1476    def list_suite_controls(self, build, suite_name=''):
1477        """Ask the devserver to list contents of all control files for |build|.
1478
1479        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1480                      whose control files' contents the caller wants returned.
1481        @param suite_name: The name of the suite for which we require control
1482                           files.
1483        @return None on failure, or a dict of contents of all control files
1484            (e.g. {'path1': "#Copyright controls ***", ...,
1485                pathX': "#Copyright controls ***"}
1486        @raise DevServerException upon any return code that's not HTTP OK.
1487        """
1488        build = self.translate(build)
1489        call = self.build_call('list_suite_controls', build=build,
1490                               suite_name=suite_name)
1491        return json.load(six.StringIO(self.run_call(call)))
1492
1493
1494class ImageServer(ImageServerBase):
1495    """Class for DevServer that handles RPCs related to CrOS images.
1496
1497    The calls to devserver to stage artifacts, including stage and download, are
1498    made in async mode. That is, when caller makes an RPC |stage| to request
1499    devserver to stage certain artifacts, devserver handles the call and starts
1500    staging artifacts in a new thread, and return |Success| without waiting for
1501    staging being completed. When caller receives message |Success|, it polls
1502    devserver's is_staged call until all artifacts are staged.
1503    Such mechanism is designed to prevent cherrypy threads in devserver being
1504    running out, as staging artifacts might take long time, and cherrypy starts
1505    with a fixed number of threads that handle devserver rpc.
1506    """
1507
1508    class ArtifactUrls(object):
1509        """A container for URLs of staged artifacts.
1510
1511        Attributes:
1512            full_payload: URL for downloading a staged full release update
1513            mton_payload: URL for downloading a staged M-to-N release update
1514            nton_payload: URL for downloading a staged N-to-N release update
1515
1516        """
1517        def __init__(self, full_payload=None, mton_payload=None,
1518                     nton_payload=None):
1519            self.full_payload = full_payload
1520            self.mton_payload = mton_payload
1521            self.nton_payload = nton_payload
1522
1523
1524    def wait_for_artifacts_staged(self,
1525                                  archive_url,
1526                                  artifacts='',
1527                                  files='',
1528                                  **kwargs):
1529        """Polling devserver.is_staged until all artifacts are staged.
1530
1531        @param archive_url: Google Storage URL for the build.
1532        @param artifacts: Comma separated list of artifacts to download.
1533        @param files: Comma separated list of files to download.
1534        @param kwargs: keyword arguments to make is_staged devserver call.
1535        @return: True if all artifacts are staged in devserver.
1536        """
1537        kwargs = {'archive_url': archive_url,
1538                  'artifacts': artifacts,
1539                  'files': files}
1540        return self._poll_is_staged(**kwargs)
1541
1542
1543    @remote_devserver_call()
1544    def call_and_wait(self,
1545                      call_name,
1546                      archive_url,
1547                      artifacts,
1548                      files,
1549                      error_message,
1550                      expected_response=SUCCESS,
1551                      clean=False):
1552        """Helper method to make a urlopen call, and wait for artifacts staged.
1553
1554        @param call_name: name of devserver rpc call.
1555        @param archive_url: Google Storage URL for the build..
1556        @param artifacts: Comma separated list of artifacts to download.
1557        @param files: Comma separated list of files to download.
1558        @param expected_response: Expected response from rpc, default to
1559                                  |Success|. If it's set to None, do not compare
1560                                  the actual response. Any response is consider
1561                                  to be good.
1562        @param error_message: Error message to be thrown if response does not
1563                              match expected_response.
1564        @param clean: Force re-loading artifacts/files from cloud, ignoring
1565                      cached version.
1566
1567        @return: The response from rpc.
1568        @raise DevServerException upon any return code that's expected_response.
1569
1570        """
1571        kwargs = {
1572                'archive_url': archive_url,
1573                'artifacts': artifacts,
1574                'files': files,
1575                'clean': clean
1576        }
1577        return self._call_and_wait(call_name, error_message,
1578                                   expected_response, **kwargs)
1579
1580
1581    @remote_devserver_call()
1582    def stage_artifacts(self, image=None, artifacts=None, files='',
1583                        archive_url=None, **kwargs):
1584        """Tell the devserver to download and stage |artifacts| from |image|.
1585
1586         This is the main call point for staging any specific artifacts for a
1587        given build. To see the list of artifacts one can stage see:
1588
1589        ~src/platfrom/dev/artifact_info.py.
1590
1591        This is maintained along with the actual devserver code.
1592
1593        @param image: the image to fetch and stage.
1594        @param artifacts: A list of artifacts.
1595        @param files: A list of files to stage.
1596        @param archive_url: Optional parameter that has the archive_url to stage
1597                this artifact from. Default is specified in autotest config +
1598                image.
1599        @param kwargs: keyword arguments that specify the build information, to
1600                make stage devserver call.
1601
1602        @raise DevServerException upon any return code that's not HTTP OK.
1603        """
1604        if not artifacts and not files:
1605            raise DevServerException('Must specify something to stage.')
1606        image = self.translate(image)
1607        self._stage_artifacts(image, artifacts, files, archive_url, **kwargs)
1608
1609
1610    @remote_devserver_call(timeout_min=DEVSERVER_SSH_TIMEOUT_MINS)
1611    def list_image_dir(self, image):
1612        """List the contents of the image stage directory, on the devserver.
1613
1614        @param image: The image name, eg: <board>-<branch>/<Milestone>-<build>.
1615
1616        @raise DevServerException upon any return code that's not HTTP OK.
1617        """
1618        image = self.translate(image)
1619        logging.info('Requesting contents from devserver %s for image %s',
1620                     self.url(), image)
1621        archive_url = _get_storage_server_for_artifacts() + image
1622        call = self.build_call('list_image_dir', archive_url=archive_url)
1623        response = self.run_call(call, readline=True)
1624        for line in response:
1625            logging.info(line)
1626
1627
1628    def trigger_download(self, image, synchronous=True):
1629        """Tell the devserver to download and stage |image|.
1630
1631        Tells the devserver to fetch |image| from the image storage server
1632        named by _get_image_storage_server().
1633
1634        If |synchronous| is True, waits for the entire download to finish
1635        staging before returning. Otherwise only the artifacts necessary
1636        to start installing images onto DUT's will be staged before returning.
1637        A caller can then call finish_download to guarantee the rest of the
1638        artifacts have finished staging.
1639
1640        @param image: the image to fetch and stage.
1641        @param synchronous: if True, waits until all components of the image are
1642               staged before returning.
1643
1644        @raise DevServerException upon any return code that's not HTTP OK.
1645
1646        """
1647        image = self.translate(image)
1648        artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE
1649        self._trigger_download(image, artifacts, files='',
1650                               synchronous=synchronous)
1651
1652
1653    @remote_devserver_call()
1654    def setup_telemetry(self, build):
1655        """Tell the devserver to setup telemetry for this build.
1656
1657        The devserver will stage autotest and then extract the required files
1658        for telemetry.
1659
1660        @param build: the build to setup telemetry for.
1661
1662        @returns path on the devserver that telemetry is installed to.
1663        """
1664        build = self.translate(build)
1665        archive_url = _get_image_storage_server() + build
1666        call = self.build_call('setup_telemetry', archive_url=archive_url)
1667        try:
1668            response = self.run_call(call)
1669        except six.moves.http_client.BadStatusLine as e:
1670            logging.error(e)
1671            raise DevServerException('Received Bad Status line, Devserver %s '
1672                                     'might have gone down while handling '
1673                                     'the call: %s' % (self.url(), call))
1674        return response
1675
1676
1677    def finish_download(self, image):
1678        """Tell the devserver to finish staging |image|.
1679
1680        If trigger_download is called with synchronous=False, it will return
1681        before all artifacts have been staged. This method contacts the
1682        devserver and blocks until all staging is completed and should be
1683        called after a call to trigger_download.
1684
1685        @param image: the image to fetch and stage.
1686        @raise DevServerException upon any return code that's not HTTP OK.
1687        """
1688        image = self.translate(image)
1689        artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST
1690        self._finish_download(image, artifacts, files='')
1691
1692
1693    def get_update_url(self, image):
1694        """Returns the url that should be passed to the updater.
1695
1696        @param image: the image that was fetched.
1697        """
1698        image = self.translate(image)
1699        url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern',
1700                                              type=str)
1701        return (url_pattern % (self.url(), image))
1702
1703
1704    def get_staged_file_url(self, filename, image):
1705        """Returns the url of a staged file for this image on the devserver."""
1706        return '/'.join([self._get_image_url(image), filename])
1707
1708
1709    def get_test_image_url(self, image):
1710        """Returns a URL to a staged test image.
1711
1712        @param image: the image that was fetched.
1713
1714        @return A fully qualified URL that can be used for downloading the
1715                image.
1716
1717        """
1718        return self._get_image_url(image) + '/chromiumos_test_image.bin'
1719
1720
1721    def get_recovery_image_url(self, image):
1722        """Returns a URL to a staged recovery image.
1723
1724        @param image: the image that was fetched.
1725
1726        @return A fully qualified URL that can be used for downloading the
1727                image.
1728
1729        """
1730        return self._get_image_url(image) + '/recovery_image.bin'
1731
1732
1733    @remote_devserver_call()
1734    def get_dependencies_file(self, build):
1735        """Ask the dev server for the contents of the suite dependencies file.
1736
1737        Ask the dev server at |self._dev_server| for the contents of the
1738        pre-processed suite dependencies file (at DEPENDENCIES_FILE)
1739        for |build|.
1740
1741        @param build: The build (e.g. x86-mario-release/R21-2333.0.0)
1742                      whose dependencies the caller is interested in.
1743        @return The contents of the dependencies file, which should eval to
1744                a dict of dicts, as per bin_utils/suite_preprocessor.py.
1745        @raise DevServerException upon any return code that's not HTTP OK.
1746        """
1747        build = self.translate(build)
1748        call = self.build_call('controlfiles',
1749                               build=build, control_path=DEPENDENCIES_FILE)
1750        return self.run_call(call)
1751
1752
1753    @remote_devserver_call()
1754    def get_latest_build_in_gs(self, board):
1755        """Ask the devservers for the latest offical build in Google Storage.
1756
1757        @param board: The board for who we want the latest official build.
1758        @return A string of the returned build rambi-release/R37-5868.0.0
1759        @raise DevServerException upon any return code that's not HTTP OK.
1760        """
1761        call = self.build_call(
1762                'xbuddy_translate/remote/%s/latest-official' % board,
1763                image_dir=_get_image_storage_server())
1764        image_name = self.run_call(call)
1765        return os.path.dirname(image_name)
1766
1767
1768    def translate(self, build_name):
1769        """Translate the build name if it's in LATEST format.
1770
1771        If the build name is in the format [builder]/LATEST, return the latest
1772        build in Google Storage otherwise return the build name as is.
1773
1774        @param build_name: build_name to check.
1775
1776        @return The actual build name to use.
1777        """
1778        match = re.match(r'([\w-]+)-(\w+)/LATEST', build_name, re.I)
1779        if not match:
1780            return build_name
1781        translated_build = self.get_latest_build_in_gs(match.groups()[0])
1782        logging.debug('Translated relative build %s to %s', build_name,
1783                      translated_build)
1784        return translated_build
1785
1786
1787    @classmethod
1788    @remote_devserver_call()
1789    def get_latest_build(cls, target, milestone=''):
1790        """Ask all the devservers for the latest build for a given target.
1791
1792        @param target: The build target, typically a combination of the board
1793                       and the type of build e.g. x86-mario-release.
1794        @param milestone:  For latest build set to '', for builds only in a
1795                           specific milestone set to a str of format Rxx
1796                           (e.g. R16). Default: ''. Since we are dealing with a
1797                           webserver sending an empty string, '', ensures that
1798                           the variable in the URL is ignored as if it was set
1799                           to None.
1800        @return A string of the returned build e.g. R20-2226.0.0.
1801        @raise DevServerException upon any return code that's not HTTP OK.
1802        """
1803        calls = cls.build_all_calls('latestbuild', target=target,
1804                                    milestone=milestone)
1805        latest_builds = []
1806        for call in calls:
1807            latest_builds.append(cls.run_call(call))
1808
1809        return max(latest_builds, key=version.LooseVersion)
1810
1811
1812    def _read_json_response_from_devserver(self, response):
1813        """Reads the json response from the devserver.
1814
1815        This is extracted to its own function so that it can be easily mocked.
1816        @param response: the response for a devserver.
1817        """
1818        try:
1819            return json.loads(response)
1820        except ValueError as e:
1821            logging.debug('Failed to load json response: %s', response)
1822            raise DevServerException(e)
1823
1824
1825    def _check_error_message(self, error_patterns_to_check, error_msg):
1826        """Detect whether specific error pattern exist in error message.
1827
1828        @param error_patterns_to_check: the error patterns to check
1829        @param error_msg: the error message which may include any error
1830                          pattern.
1831
1832        @return A boolean variable, True if error_msg contains any error
1833            pattern in error_patterns_to_check, False otherwise.
1834        """
1835        for err in error_patterns_to_check:
1836            if err in error_msg:
1837                return True
1838
1839        return False
1840
1841
1842class AndroidBuildServer(ImageServerBase):
1843    """Class for DevServer that handles RPCs related to Android builds.
1844
1845    The calls to devserver to stage artifacts, including stage and download, are
1846    made in async mode. That is, when caller makes an RPC |stage| to request
1847    devserver to stage certain artifacts, devserver handles the call and starts
1848    staging artifacts in a new thread, and return |Success| without waiting for
1849    staging being completed. When caller receives message |Success|, it polls
1850    devserver's is_staged call until all artifacts are staged.
1851    Such mechanism is designed to prevent cherrypy threads in devserver being
1852    running out, as staging artifacts might take long time, and cherrypy starts
1853    with a fixed number of threads that handle devserver rpc.
1854    """
1855
1856    def wait_for_artifacts_staged(self, target, build_id, branch,
1857                                  archive_url=None, artifacts='', files=''):
1858        """Polling devserver.is_staged until all artifacts are staged.
1859
1860        @param target: Target of the android build to stage, e.g.,
1861                       shamu-userdebug.
1862        @param build_id: Build id of the android build to stage.
1863        @param branch: Branch of the android build to stage.
1864        @param archive_url: Google Storage URL for the build.
1865        @param artifacts: Comma separated list of artifacts to download.
1866        @param files: Comma separated list of files to download.
1867
1868        @return: True if all artifacts are staged in devserver.
1869        """
1870        kwargs = {'target': target,
1871                  'build_id': build_id,
1872                  'branch': branch,
1873                  'artifacts': artifacts,
1874                  'files': files,
1875                  'os_type': 'android'}
1876        if archive_url:
1877            kwargs['archive_url'] = archive_url
1878        return self._poll_is_staged(**kwargs)
1879
1880
1881    @remote_devserver_call()
1882    def call_and_wait(self, call_name, target, build_id, branch, archive_url,
1883                      artifacts, files, error_message,
1884                      expected_response=SUCCESS):
1885        """Helper method to make a urlopen call, and wait for artifacts staged.
1886
1887        @param call_name: name of devserver rpc call.
1888        @param target: Target of the android build to stage, e.g.,
1889                       shamu-userdebug.
1890        @param build_id: Build id of the android build to stage.
1891        @param branch: Branch of the android build to stage.
1892        @param archive_url: Google Storage URL for the CrOS build.
1893        @param artifacts: Comma separated list of artifacts to download.
1894        @param files: Comma separated list of files to download.
1895        @param expected_response: Expected response from rpc, default to
1896                                  |Success|. If it's set to None, do not compare
1897                                  the actual response. Any response is consider
1898                                  to be good.
1899        @param error_message: Error message to be thrown if response does not
1900                              match expected_response.
1901
1902        @return: The response from rpc.
1903        @raise DevServerException upon any return code that's expected_response.
1904
1905        """
1906        kwargs = {'target': target,
1907                  'build_id': build_id,
1908                  'branch': branch,
1909                  'artifacts': artifacts,
1910                  'files': files,
1911                  'os_type': 'android'}
1912        if archive_url:
1913            kwargs['archive_url'] = archive_url
1914        return self._call_and_wait(call_name, error_message, expected_response,
1915                                   **kwargs)
1916
1917
1918    @remote_devserver_call()
1919    def stage_artifacts(self, target=None, build_id=None, branch=None,
1920                        image=None, artifacts=None, files='', archive_url=None):
1921        """Tell the devserver to download and stage |artifacts| from |image|.
1922
1923         This is the main call point for staging any specific artifacts for a
1924        given build. To see the list of artifacts one can stage see:
1925
1926        ~src/platfrom/dev/artifact_info.py.
1927
1928        This is maintained along with the actual devserver code.
1929
1930        @param target: Target of the android build to stage, e.g.,
1931                               shamu-userdebug.
1932        @param build_id: Build id of the android build to stage.
1933        @param branch: Branch of the android build to stage.
1934        @param image: Name of a build to test, in the format of
1935                      branch/target/build_id
1936        @param artifacts: A list of artifacts.
1937        @param files: A list of files to stage.
1938        @param archive_url: Optional parameter that has the archive_url to stage
1939                this artifact from. Default is specified in autotest config +
1940                image.
1941
1942        @raise DevServerException upon any return code that's not HTTP OK.
1943        """
1944        if image and not target and not build_id and not branch:
1945            branch, target, build_id = utils.parse_launch_control_build(image)
1946        if not target or not build_id or not branch:
1947            raise DevServerException('Must specify all build info (target, '
1948                                     'build_id and branch) to stage.')
1949
1950        android_build_info = {'target': target,
1951                              'build_id': build_id,
1952                              'branch': branch}
1953        if not artifacts and not files:
1954            raise DevServerException('Must specify something to stage.')
1955        if not all(android_build_info.values()):
1956            raise DevServerException(
1957                    'To stage an Android build, must specify target, build id '
1958                    'and branch.')
1959        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
1960        self._stage_artifacts(build, artifacts, files, archive_url,
1961                              **android_build_info)
1962
1963    def trigger_download(self, target, build_id, branch, artifacts=None,
1964                         files='', os='android', synchronous=True):
1965        """Tell the devserver to download and stage an Android build.
1966
1967        Tells the devserver to fetch an Android build from the image storage
1968        server named by _get_image_storage_server().
1969
1970        If |synchronous| is True, waits for the entire download to finish
1971        staging before returning. Otherwise only the artifacts necessary
1972        to start installing images onto DUT's will be staged before returning.
1973        A caller can then call finish_download to guarantee the rest of the
1974        artifacts have finished staging.
1975
1976        @param target: Target of the android build to stage, e.g.,
1977                       shamu-userdebug.
1978        @param build_id: Build id of the android build to stage.
1979        @param branch: Branch of the android build to stage.
1980        @param artifacts: A string of artifacts separated by comma. If None,
1981               use the default artifacts for Android or Brillo build.
1982        @param files: String of file seperated by commas.
1983        @param os: OS artifacts to download (android/brillo).
1984        @param synchronous: if True, waits until all components of the image are
1985               staged before returning.
1986
1987        @raise DevServerException upon any return code that's not HTTP OK.
1988
1989        """
1990        android_build_info = {'target': target,
1991                              'build_id': build_id,
1992                              'branch': branch}
1993        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
1994        if not artifacts:
1995            board = target.split('-')[0]
1996            artifacts = (
1997                android_utils.AndroidArtifacts.get_artifacts_for_reimage(
1998                        board, os))
1999        self._trigger_download(build, artifacts, files=files,
2000                               synchronous=synchronous, **android_build_info)
2001
2002
2003    def finish_download(self, target, build_id, branch, os='android'):
2004        """Tell the devserver to finish staging an Android build.
2005
2006        If trigger_download is called with synchronous=False, it will return
2007        before all artifacts have been staged. This method contacts the
2008        devserver and blocks until all staging is completed and should be
2009        called after a call to trigger_download.
2010
2011        @param target: Target of the android build to stage, e.g.,
2012                       shamu-userdebug.
2013        @param build_id: Build id of the android build to stage.
2014        @param branch: Branch of the android build to stage.
2015        @param os: OS artifacts to download (android/brillo).
2016
2017        @raise DevServerException upon any return code that's not HTTP OK.
2018        """
2019        android_build_info = {'target': target,
2020                              'build_id': build_id,
2021                              'branch': branch}
2022        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2023        board = target.split('-')[0]
2024        artifacts = (
2025                android_utils.AndroidArtifacts.get_artifacts_for_reimage(
2026                        board))
2027        self._finish_download(build, artifacts, files='', **android_build_info)
2028
2029
2030    def get_staged_file_url(self, filename, target, build_id, branch):
2031        """Returns the url of a staged file for this image on the devserver.
2032
2033        @param filename: Name of the file.
2034        @param target: Target of the android build to stage, e.g.,
2035                       shamu-userdebug.
2036        @param build_id: Build id of the android build to stage.
2037        @param branch: Branch of the android build to stage.
2038
2039        @return: The url of a staged file for this image on the devserver.
2040        """
2041        android_build_info = {'target': target,
2042                              'build_id': build_id,
2043                              'branch': branch,
2044                              'os_type': 'android'}
2045        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2046        return '/'.join([self._get_image_url(build), filename])
2047
2048
2049    @remote_devserver_call()
2050    def translate(self, build_name):
2051        """Translate the build name if it's in LATEST format.
2052
2053        If the build name is in the format [branch]/[target]/LATEST, return the
2054        latest build in Launch Control otherwise return the build name as is.
2055
2056        @param build_name: build_name to check.
2057
2058        @return The actual build name to use.
2059        """
2060        branch, target, build_id = utils.parse_launch_control_build(build_name)
2061        if build_id.upper() != 'LATEST':
2062            return build_name
2063        call = self.build_call('latestbuild', branch=branch, target=target,
2064                               os_type='android')
2065        translated_build_id = self.run_call(call)
2066        translated_build = (ANDROID_BUILD_NAME_PATTERN %
2067                            {'branch': branch,
2068                             'target': target,
2069                             'build_id': translated_build_id})
2070        logging.debug('Translated relative build %s to %s', build_name,
2071                      translated_build)
2072        return translated_build
2073
2074
2075def _is_load_healthy(load):
2076    """Check if devserver's load meets the minimum threshold.
2077
2078    @param load: The devserver's load stats to check.
2079
2080    @return: True if the load meets the minimum threshold. Return False
2081             otherwise.
2082
2083    """
2084    # Threshold checks, including CPU load.
2085    if load[DevServer.CPU_LOAD] > DevServer.MAX_CPU_LOAD:
2086        logging.debug('CPU load of devserver %s is at %s%%, which is higher '
2087                      'than the threshold of %s%%', load['devserver'],
2088                      load[DevServer.CPU_LOAD], DevServer.MAX_CPU_LOAD)
2089        return False
2090    if load[DevServer.NETWORK_IO] > DevServer.MAX_NETWORK_IO:
2091        logging.debug('Network IO of devserver %s is at %i Bps, which is '
2092                      'higher than the threshold of %i bytes per second.',
2093                      load['devserver'], load[DevServer.NETWORK_IO],
2094                      DevServer.MAX_NETWORK_IO)
2095        return False
2096    return True
2097
2098
2099def _compare_load(devserver1, devserver2):
2100    """Comparator function to compare load between two devservers.
2101
2102    @param devserver1: A dictionary of devserver load stats to be compared.
2103    @param devserver2: A dictionary of devserver load stats to be compared.
2104
2105    @return: Negative value if the load of `devserver1` is less than the load
2106             of `devserver2`. Return positive value otherwise.
2107
2108    """
2109    return int(devserver1[DevServer.DISK_IO] - devserver2[DevServer.DISK_IO])
2110
2111
2112def _get_subnet_for_host_ip(host_ip,
2113                            restricted_subnets=utils.RESTRICTED_SUBNETS):
2114    """Get the subnet for a given host IP.
2115
2116    @param host_ip: the IP of a DUT.
2117    @param restricted_subnets: A list of restriected subnets.
2118
2119    @return: a (subnet_ip, mask_bits) tuple. If no matched subnet for the
2120             host_ip, return (None, None).
2121    """
2122    for subnet_ip, mask_bits in restricted_subnets:
2123        if utils.is_in_same_subnet(host_ip, subnet_ip, mask_bits):
2124            return subnet_ip, mask_bits
2125
2126    return None, None
2127
2128
2129def _get_subnet_group_for_host_ip(host_ip, all_subnets=()):
2130    """Get subnet group for a given host IP.
2131
2132    All subnets in the group are reachable from the input host ip.
2133
2134    @param host_ip: the IP of a DUT.
2135    @param all_subnets: A two level list of subnets including singleton
2136                        lists of a restricted subnet and p2p subnets.
2137
2138    @return: a list of (subnet_ip, mask_bits) tuple. If no matched subnets for
2139             the host_ip, return [].
2140    """
2141    for subnet_group in all_subnets:
2142        subnet, _ = _get_subnet_for_host_ip(host_ip,
2143                                            restricted_subnets=subnet_group)
2144        if subnet:
2145            return subnet_group
2146    return []
2147
2148
2149def get_least_loaded_devserver(devserver_type=ImageServer, hostname=None):
2150    """Get the devserver with the least load.
2151
2152    Iterate through all devservers and get the one with least load.
2153
2154    TODO(crbug.com/486278): Devserver with required build already staged should
2155    take higher priority. This will need check_health call to be able to verify
2156    existence of a given build/artifact. Also, in case all devservers are
2157    overloaded, the logic here should fall back to the old behavior that randomly
2158    selects a devserver based on the hash of the image name/url.
2159
2160    @param devserver_type: Type of devserver to select from. Default is set to
2161                           ImageServer.
2162    @param hostname: Hostname of the dut that the devserver is used for. The
2163            picked devserver needs to respect the location of the host if
2164            `prefer_local_devserver` is set to True or `restricted_subnets` is
2165            set.
2166
2167    @return: Name of the devserver with the least load.
2168
2169    """
2170    logging.debug('Get the least loaded %r', devserver_type)
2171    devservers, can_retry = devserver_type.get_available_devservers(
2172            hostname)
2173    # If no healthy devservers available and can_retry is False, return None.
2174    # Otherwise, relax the constrain on hostname, allow all devservers to be
2175    # available.
2176    if not devserver_type.get_healthy_devserver('', devservers):
2177        if not can_retry:
2178            return None
2179        else:
2180            devservers, _ = devserver_type.get_available_devservers()
2181
2182    # get_devserver_load call needs to be made in a new process to allow force
2183    # timeout using signal.
2184    output = multiprocessing.Queue()
2185    processes = []
2186    for devserver in devservers:
2187        processes.append(multiprocessing.Process(
2188                target=devserver_type.get_devserver_load_wrapper,
2189                args=(devserver, TIMEOUT_GET_DEVSERVER_LOAD, output)))
2190
2191    for p in processes:
2192        p.start()
2193    for p in processes:
2194        # The timeout for the process commands aren't reliable.  Add
2195        # some extra time to the timeout for potential overhead in the
2196        # subprocesses.  crbug.com/913695
2197        p.join(TIMEOUT_GET_DEVSERVER_LOAD + 10)
2198    # Read queue before killing processes to avoid corrupting the queue.
2199    loads = [output.get() for p in processes if not p.is_alive()]
2200    for p in processes:
2201        if p.is_alive():
2202            p.terminate()
2203    # Filter out any load failed to be retrieved or does not support load check.
2204    loads = [load for load in loads if load and DevServer.CPU_LOAD in load and
2205             DevServer.is_free_disk_ok(load) and
2206             DevServer.is_apache_client_count_ok(load)]
2207    if not loads:
2208        logging.debug('Failed to retrieve load stats from any devserver. No '
2209                      'load balancing can be applied.')
2210        return None
2211    loads = [load for load in loads if _is_load_healthy(load)]
2212    if not loads:
2213        logging.error('No devserver has the capacity to be selected.')
2214        return None
2215    loads = sorted(loads, cmp=_compare_load)
2216    return loads[0]['devserver']
2217
2218
2219def resolve(build, hostname=None, ban_list=None):
2220    """Resolve a devserver can be used for given build and hostname.
2221
2222    @param build: Name of a build to stage on devserver, e.g.,
2223                  ChromeOS build: daisy-release/R50-1234.0.0
2224                  Launch Control build: git_mnc_release/shamu-eng
2225    @param hostname: Hostname of a devserver for, default is None, which means
2226            devserver is not restricted by the network location of the host.
2227    @param ban_list: The ban_list of devservers shouldn't be chosen.
2228
2229    @return: A DevServer instance that can be used to stage given build for the
2230             given host.
2231    """
2232    if utils.is_launch_control_build(build):
2233        return AndroidBuildServer.resolve(build, hostname)
2234    else:
2235        return ImageServer.resolve(build, hostname, ban_list=ban_list)
2236