1# Lint as: python2, python3 2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6from __future__ import absolute_import 7from __future__ import division 8from __future__ import print_function 9 10from distutils import version 11import json 12import logging 13import multiprocessing 14import os 15import re 16import shutil 17import subprocess 18from threading import Timer 19import six 20from six.moves import urllib 21import six.moves.html_parser 22import six.moves.http_client 23import six.moves.urllib.parse 24import time 25 26from autotest_lib.client.bin import utils as bin_utils 27from autotest_lib.client.common_lib import android_utils 28from autotest_lib.client.common_lib import error 29from autotest_lib.client.common_lib import global_config 30from autotest_lib.client.common_lib import seven 31from autotest_lib.client.common_lib import utils 32from autotest_lib.client.common_lib.cros import retry 33 34 35try: 36 from autotest_lib.utils.frozen_chromite.lib import metrics 37except ImportError: 38 metrics = utils.metrics_mock 39 40 41CONFIG = global_config.global_config 42# This file is generated at build time and specifies, per suite and per test, 43# the DEPENDENCIES list specified in each control file. It's a dict of dicts: 44# {'bvt': {'/path/to/autotest/control/site_tests/test1/control': ['dep1']} 45# 'suite': {'/path/to/autotest/control/site_tests/test2/control': ['dep2']} 46# 'power': {'/path/to/autotest/control/site_tests/test1/control': ['dep1'], 47# '/path/to/autotest/control/site_tests/test3/control': ['dep3']} 48# } 49DEPENDENCIES_FILE = 'test_suites/dependency_info' 50# Number of seconds for caller to poll devserver's is_staged call to check if 51# artifacts are staged. 52_ARTIFACT_STAGE_POLLING_INTERVAL = 5 53# Artifacts that should be staged when client calls devserver RPC to stage an 54# image. 55_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = 'full_payload,test_suites,stateful' 56# Artifacts that should be staged when client calls devserver RPC to stage an 57# image with autotest artifact. 58_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST = ('full_payload,test_suites,' 59 'control_files,stateful,' 60 'autotest_packages') 61# Artifacts that should be staged when client calls devserver RPC to stage an 62# Android build. 63SKIP_DEVSERVER_HEALTH_CHECK = CONFIG.get_config_value( 64 'CROS', 'skip_devserver_health_check', type=bool) 65# Number of seconds for the call to get devserver load to time out. 66TIMEOUT_GET_DEVSERVER_LOAD = 2.0 67 68# Android artifact path in devserver 69ANDROID_BUILD_NAME_PATTERN = CONFIG.get_config_value( 70 'CROS', 'android_build_name_pattern', type=str).replace('\\', '') 71 72# Return value from a devserver RPC indicating the call succeeded. 73SUCCESS = 'Success' 74 75# The timeout minutes for a given devserver ssh call. 76DEVSERVER_SSH_TIMEOUT_MINS = 1 77 78# Error message for invalid devserver response. 79ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE = 'Proxy Error' 80ERR_MSG_FOR_DOWN_DEVSERVER = 'Service Unavailable' 81 82# Error message for devserver call timedout. 83ERR_MSG_FOR_TIMED_OUT_CALL = 'timeout' 84 85# The timeout minutes for waiting a devserver staging. 86DEVSERVER_IS_STAGING_RETRY_MIN = 100 87 88# Provision error patterns. 89# People who see this should know that they shouldn't change these 90# classification strings. These strings are used for monitoring provision 91# failures. Any changes may mess up the stats. 92_EXCEPTION_PATTERNS = [ 93 # Raised when devserver portfile does not exist on host. 94 (r".*Devserver portfile does not exist!.*$", 95 '(1) Devserver portfile does not exist on host'), 96 # Raised when devserver cannot copy packages to host. 97 (r".*Could not copy .* to device.*$", 98 '(2) Cannot copy packages to host'), 99 # Raised when devserver fails to run specific commands on host. 100 (r".*cwd=None, extra env=\{'LC_MESSAGES': 'C'\}.*$", 101 '(3) Fail to run specific command on host'), 102 # Raised when new build fails to boot on the host. 103 (r'.*RootfsUpdateError: Build .* failed to boot on.*$', 104 '(4) Build failed to boot on host'), 105 # Raised when the auto-update process is timed out. 106 (r'.*The CrOS auto-update process is timed out, ' 107 'thus will be terminated.*$', 108 '(5) Auto-update is timed out'), 109 # Raised when the host is not pingable. 110 (r".*DeviceNotPingableError.*$", 111 '(6) Host is not pingable during auto-update'), 112 # Raised when hosts have unexpected status after rootfs update. 113 (r'.*Update failed with unexpected update status: ' 114 'UPDATE_STATUS_IDLE.*$', 115 '(7) Host has unexpected status: UPDATE_STATUS_IDLE after rootfs ' 116 'update'), 117 # Raised when devserver returns non-json response to shard/drone. 118 (r'.*No JSON object could be decoded.*$', 119 '(8) Devserver returned non-json object'), 120 # Raised when devserver loses host's ssh connection 121 (r'.*SSHConnectionError\: .* port 22\: Connection timed out.*$', 122 "(9) Devserver lost host's ssh connection"), 123 # Raised when error happens in writing files to host 124 (r'.*Write failed\: Broken pipe.*$', 125 "(10) Broken pipe while writing or connecting to host")] 126 127PREFER_LOCAL_DEVSERVER = CONFIG.get_config_value( 128 'CROS', 'prefer_local_devserver', type=bool, default=False) 129 130ENABLE_SSH_CONNECTION_FOR_DEVSERVER = CONFIG.get_config_value( 131 'CROS', 'enable_ssh_connection_for_devserver', type=bool, 132 default=False) 133 134DEFAULT_SUBNET_MASKBIT = 19 135 136 137class DevServerException(Exception): 138 """Raised when the dev server returns a non-200 HTTP response.""" 139 pass 140 141 142class DevServerOverloadException(Exception): 143 """Raised when the dev server returns a 502 HTTP response.""" 144 pass 145 146class DevServerFailToLocateException(Exception): 147 """Raised when fail to locate any devserver.""" 148 pass 149 150 151class MarkupStripper(six.moves.html_parser.HTMLParser): 152 """HTML parser that strips HTML tags, coded characters like & 153 154 Works by, basically, not doing anything for any tags, and only recording 155 the content of text nodes in an internal data structure. 156 """ 157 def __init__(self): 158 self.reset() 159 self.fed = [] 160 self.convert_charrefs = True 161 162 163 def handle_data(self, d): 164 """Consume content of text nodes, store it away.""" 165 self.fed.append(d) 166 167 168 def get_data(self): 169 """Concatenate and return all stored data.""" 170 return ''.join(self.fed) 171 172 173def _strip_http_message(message): 174 """Strip the HTTP marker from the an HTTP message. 175 176 @param message: A string returned by an HTTP call. 177 178 @return: A string with HTTP marker being stripped. 179 """ 180 strip = MarkupStripper() 181 try: 182 strip.feed(seven.ensure_text(message, 'utf_32')) 183 except UnicodeDecodeError: 184 strip.feed(message) 185 return strip.get_data() 186 187 188def _get_image_storage_server(): 189 image_path = CONFIG.get_config_value('CROS', 190 'image_storage_server', 191 type=str) 192 # see b/203531740; this forces a trailing / if not there yet. 193 return os.path.join(image_path, '') 194 195 196def _get_canary_channel_server(): 197 """ 198 Get the url of the canary-channel server, 199 eg: gsutil://chromeos-releases/canary-channel/<board>/<release> 200 201 @return: The url to the canary channel server. 202 """ 203 image_path = CONFIG.get_config_value('CROS', 204 'canary_channel_server', 205 type=str) 206 # see b/203531740; this forces a trailing / if not there yet. 207 return os.path.join(image_path, '') 208 209 210def _get_storage_server_for_artifacts(artifacts=None): 211 """Gets the appropriate storage server for the given artifacts. 212 213 @param artifacts: A list of artifacts we need to stage. 214 @return: The address of the storage server that has these artifacts. 215 The default image storage server if no artifacts are specified. 216 """ 217 factory_artifact = global_config.global_config.get_config_value( 218 'CROS', 'factory_artifact', type=str, default='') 219 if artifacts and factory_artifact and factory_artifact in artifacts: 220 return _get_canary_channel_server() 221 return _get_image_storage_server() 222 223 224def _gs_or_local_archive_url_args(archive_url): 225 """Infer the devserver call arguments to use with the given archive_url. 226 227 @param archive_url: The archive url to include the in devserver RPC. This 228 can either e a GS path or a local path. 229 @return: A dict of arguments to include in the devserver call. 230 """ 231 if not archive_url: 232 return {} 233 elif archive_url.startswith('gs://'): 234 return {'archive_url': archive_url} 235 else: 236 # For a local path, we direct the devserver to move the files while 237 # staging. This is the fastest way to stage local files, but deletes the 238 # files from the source. This is OK because the files are available on 239 # the devserver once staged. 240 return { 241 'local_path': archive_url, 242 'delete_source': True, 243 } 244 245 246def _reverse_lookup_from_config(address): 247 """Look up hostname for the given IP address. 248 249 This uses the hostname-address map from the config file. 250 251 If multiple hostnames map to the same IP address, the first one 252 defined in the configuration file takes precedence. 253 254 @param address: IP address string 255 @returns: hostname string, or original input if not found 256 """ 257 for hostname, addr in six.iteritems(_get_hostname_addr_map()): 258 if addr == address: 259 return hostname 260 return address 261 262 263def _get_hostname_addr_map(): 264 """Get hostname address mapping from config. 265 266 @return: dict mapping server hostnames to addresses 267 """ 268 return CONFIG.get_section_as_dict('HOSTNAME_ADDR_MAP') 269 270 271def _get_dev_server_list(): 272 return CONFIG.get_config_value('CROS', 'dev_server', type=list, default=[]) 273 274 275def _get_crash_server_list(): 276 return CONFIG.get_config_value('CROS', 'crash_server', type=list, 277 default=[]) 278 279 280def remote_devserver_call(timeout_min=DEVSERVER_IS_STAGING_RETRY_MIN, 281 exception_to_raise=DevServerException): 282 """A decorator to use with remote devserver calls. 283 284 This decorator converts urllib2.HTTPErrors into DevServerExceptions 285 with any embedded error info converted into plain text. The method 286 retries on urllib2.URLError or error.CmdError to avoid devserver flakiness. 287 """ 288 #pylint: disable=C0111 289 290 def inner_decorator(method): 291 label = method.__name__ if hasattr(method, '__name__') else None 292 def metrics_wrapper(*args, **kwargs): 293 @retry.retry((urllib.error.URLError, error.CmdError, 294 DevServerOverloadException), 295 timeout_min=timeout_min, 296 exception_to_raise=exception_to_raise, 297 label=label) 298 def wrapper(): 299 """This wrapper actually catches the HTTPError.""" 300 try: 301 return method(*args, **kwargs) 302 except urllib.error.HTTPError as e: 303 error_markup = e.read() 304 raise DevServerException(_strip_http_message(error_markup)) 305 306 try: 307 return wrapper() 308 except Exception as e: 309 if ERR_MSG_FOR_TIMED_OUT_CALL in str(e): 310 dev_server = None 311 if args and isinstance(args[0], DevServer): 312 dev_server = args[0].hostname 313 elif 'devserver' in kwargs: 314 dev_server = get_hostname(kwargs['devserver']) 315 316 logging.debug('RPC call %s has timed out on devserver %s.', 317 label, dev_server) 318 c = metrics.Counter( 319 'chromeos/autotest/devserver/call_timeout') 320 c.increment(fields={'dev_server': dev_server, 321 'healthy': label}) 322 323 raise 324 325 return metrics_wrapper 326 327 return inner_decorator 328 329 330def get_hostname(url): 331 """Get the hostname portion of a URL 332 333 schema://hostname:port/path 334 335 @param url: a Url string 336 @return: a hostname string 337 """ 338 return six.moves.urllib.parse.urlparse(url).hostname 339 340 341def get_resolved_hostname(url): 342 """Get the symbolic hostname from url. 343 344 If the given `url` uses a numeric IP address, try and find a 345 symbolic name from the hostname map in the config file. 346 347 @param url The URL with which to perform the conversion/lookup. 348 """ 349 return _reverse_lookup_from_config(get_hostname(url)) 350 351 352class DevServer(object): 353 """Base class for all DevServer-like server stubs. 354 355 This is the base class for interacting with all Dev Server-like servers. 356 A caller should instantiate a sub-class of DevServer with: 357 358 host = SubClassServer.resolve(build) 359 server = SubClassServer(host) 360 """ 361 _MIN_FREE_DISK_SPACE_GB = 20 362 _MAX_APACHE_CLIENT_COUNT = 75 363 # Threshold for the CPU load percentage for a devserver to be selected. 364 MAX_CPU_LOAD = 80.0 365 # Threshold for the network IO, set to 80MB/s 366 MAX_NETWORK_IO = 1024 * 1024 * 80 367 DISK_IO = 'disk_total_bytes_per_second' 368 NETWORK_IO = 'network_total_bytes_per_second' 369 CPU_LOAD = 'cpu_percent' 370 FREE_DISK = 'free_disk' 371 AU_PROCESS = 'au_process_count' 372 STAGING_THREAD_COUNT = 'staging_thread_count' 373 APACHE_CLIENT_COUNT = 'apache_client_count' 374 375 376 def __init__(self, devserver): 377 self._devserver = devserver 378 379 380 def url(self): 381 """Returns the url for this devserver.""" 382 return self._devserver 383 384 385 @property 386 def hostname(self): 387 """Return devserver hostname parsed from the devserver URL. 388 389 Note that this is likely parsed from the devserver URL from 390 shadow_config.ini, meaning that the "hostname" part of the 391 devserver URL is actually an IP address. 392 393 @return hostname string 394 """ 395 return get_hostname(self.url()) 396 397 398 @property 399 def resolved_hostname(self): 400 """Return devserver hostname, resolved from its IP address. 401 402 Unlike the hostname property, this property attempts to look up 403 the proper hostname from the devserver IP address. If lookup 404 fails, then fall back to whatever the hostname property would 405 have returned. 406 407 @return hostname string 408 """ 409 return _reverse_lookup_from_config(self.hostname) 410 411 412 @staticmethod 413 def get_server_url(url): 414 """Get the devserver url from a repo url, which includes build info. 415 416 @param url: A job repo url. 417 418 @return A devserver url, e.g., http://127.0.0.10:8080 419 """ 420 res = six.moves.urllib.parse.urlparse(url) 421 if res.netloc: 422 return res.scheme + '://' + res.netloc 423 424 425 @classmethod 426 def get_devserver_load_wrapper(cls, devserver, timeout_sec, output): 427 """A wrapper function to call get_devserver_load in parallel. 428 429 @param devserver: url of the devserver. 430 @param timeout_sec: Number of seconds before time out the devserver 431 call. 432 @param output: An output queue to save results to. 433 """ 434 load = cls.get_devserver_load(devserver, timeout_min=timeout_sec/60.0) 435 if load: 436 load['devserver'] = devserver 437 output.put(load) 438 439 440 @classmethod 441 def get_devserver_load(cls, devserver, 442 timeout_min=DEVSERVER_SSH_TIMEOUT_MINS): 443 """Returns True if the |devserver| is healthy to stage build. 444 445 @param devserver: url of the devserver. 446 @param timeout_min: How long to wait in minutes before deciding the 447 the devserver is not up (float). 448 449 @return: A dictionary of the devserver's load. 450 451 """ 452 call = cls._build_call(devserver, 'check_health') 453 @remote_devserver_call(timeout_min=timeout_min) 454 def get_load(devserver=devserver): 455 """Inner method that makes the call.""" 456 return cls.run_call(call, timeout=timeout_min*60) 457 458 try: 459 return json.load(six.StringIO(get_load(devserver=devserver))) 460 except Exception as e: 461 logging.error('Devserver call failed: "%s", timeout: %s seconds,' 462 ' Error: %s', call, timeout_min * 60, e) 463 464 465 @classmethod 466 def is_free_disk_ok(cls, load): 467 """Check if a devserver has enough free disk. 468 469 @param load: A dict of the load of the devserver. 470 471 @return: True if the devserver has enough free disk or disk check is 472 skipped in global config. 473 474 """ 475 if SKIP_DEVSERVER_HEALTH_CHECK: 476 logging.debug('devserver health check is skipped.') 477 elif load[cls.FREE_DISK] < cls._MIN_FREE_DISK_SPACE_GB: 478 return False 479 480 return True 481 482 483 @classmethod 484 def is_apache_client_count_ok(cls, load): 485 """Check if a devserver has enough Apache connections available. 486 487 Apache server by default has maximum of 150 concurrent connections. If 488 a devserver has too many live connections, it likely indicates the 489 server is busy handling many long running download requests, e.g., 490 downloading stateful partitions. It is better not to add more requests 491 to it. 492 493 @param load: A dict of the load of the devserver. 494 495 @return: True if the devserver has enough Apache connections available, 496 or disk check is skipped in global config. 497 498 """ 499 if SKIP_DEVSERVER_HEALTH_CHECK: 500 logging.debug('devserver health check is skipped.') 501 elif cls.APACHE_CLIENT_COUNT not in load: 502 logging.debug('Apache client count is not collected from devserver.') 503 elif (load[cls.APACHE_CLIENT_COUNT] > 504 cls._MAX_APACHE_CLIENT_COUNT): 505 return False 506 507 return True 508 509 510 @classmethod 511 def devserver_healthy(cls, devserver, 512 timeout_min=DEVSERVER_SSH_TIMEOUT_MINS): 513 """Returns True if the |devserver| is healthy to stage build. 514 515 @param devserver: url of the devserver. 516 @param timeout_min: How long to wait in minutes before deciding the 517 the devserver is not up (float). 518 519 @return: True if devserver is healthy. Return False otherwise. 520 521 """ 522 c = metrics.Counter('chromeos/autotest/devserver/devserver_healthy') 523 reason = '' 524 healthy = False 525 load = cls.get_devserver_load(devserver, timeout_min=timeout_min) 526 try: 527 if not load: 528 # Failed to get the load of devserver. 529 reason = '(1) Failed to get load.' 530 return False 531 532 apache_ok = cls.is_apache_client_count_ok(load) 533 if not apache_ok: 534 reason = '(2) Apache client count too high.' 535 logging.error('Devserver check_health failed. Live Apache client ' 536 'count is too high: %d.', 537 load[cls.APACHE_CLIENT_COUNT]) 538 return False 539 540 disk_ok = cls.is_free_disk_ok(load) 541 if not disk_ok: 542 reason = '(3) Disk space too low.' 543 logging.error('Devserver check_health failed. Free disk space is ' 544 'low. Only %dGB is available.', 545 load[cls.FREE_DISK]) 546 healthy = bool(disk_ok) 547 return disk_ok 548 finally: 549 c.increment(fields={'dev_server': cls(devserver).resolved_hostname, 550 'healthy': healthy, 551 'reason': reason}) 552 # Monitor how many AU processes the devserver is currently running. 553 if load is not None and load.get(DevServer.AU_PROCESS): 554 c_au = metrics.Gauge( 555 'chromeos/autotest/devserver/devserver_au_count') 556 c_au.set( 557 load.get(DevServer.AU_PROCESS), 558 fields={'dev_server': cls(devserver).resolved_hostname}) 559 560 561 @staticmethod 562 def _build_call(host, method, **kwargs): 563 """Build a URL to |host| that calls |method|, passing |kwargs|. 564 565 Builds a URL that calls |method| on the dev server defined by |host|, 566 passing a set of key/value pairs built from the dict |kwargs|. 567 568 @param host: a string that is the host basename e.g. http://server:90. 569 @param method: the dev server method to call. 570 @param kwargs: a dict mapping arg names to arg values. 571 @return the URL string. 572 """ 573 # If the archive_url is a local path, the args expected by the devserver 574 # are a little different. 575 archive_url_args = _gs_or_local_archive_url_args( 576 kwargs.pop('archive_url', None)) 577 kwargs.update(archive_url_args) 578 if 'is_async' in kwargs: 579 f = kwargs.pop('is_async') 580 kwargs['async'] = f 581 argstr = '&'.join(["%s=%s" % x for x in six.iteritems(kwargs)]) 582 return "%(host)s/%(method)s?%(argstr)s" % dict( 583 host=host, method=method, argstr=argstr) 584 585 586 def build_call(self, method, **kwargs): 587 """Builds a devserver RPC string that is used by 'run_call()'. 588 589 @param method: remote devserver method to call. 590 """ 591 return self._build_call(self._devserver, method, **kwargs) 592 593 594 @classmethod 595 def build_all_calls(cls, method, **kwargs): 596 """Builds a list of URLs that makes RPC calls on all devservers. 597 598 Build a URL that calls |method| on the dev server, passing a set 599 of key/value pairs built from the dict |kwargs|. 600 601 @param method: the dev server method to call. 602 @param kwargs: a dict mapping arg names to arg values 603 604 @return the URL string 605 """ 606 calls = [] 607 # Note we use cls.servers as servers is class specific. 608 for server in cls.servers(): 609 if cls.devserver_healthy(server): 610 calls.append(cls._build_call(server, method, **kwargs)) 611 612 return calls 613 614 615 @classmethod 616 def run_request(cls, call, timeout=None): 617 """Invoke a given devserver call using urllib.open. 618 619 Open the URL with HTTP, and return the text of the response. Exceptions 620 may be raised as for urllib2.urlopen(). 621 622 @param call: a url string that calls a method to a devserver. 623 @param timeout: The timeout seconds for this urlopen call. 624 625 @return A HTTPResponse object. 626 """ 627 if timeout is None: 628 return urllib.request.urlopen(call) 629 else: 630 return utils.urlopen_socket_timeout(call, timeout=timeout) 631 632 @classmethod 633 def run_call(cls, call, readline=False, timeout=None): 634 """Invoke a given devserver call using urllib.open. 635 636 Open the URL with HTTP, and return the text of the response. Exceptions 637 may be raised as for urllib2.urlopen(). 638 639 @param call: a url string that calls a method to a devserver. 640 @param readline: whether read http response line by line. 641 @param timeout: The timeout seconds for this urlopen call. 642 643 @return the results of this call. 644 """ 645 response = cls.run_request(call, timeout=timeout) 646 if readline: 647 return [line.rstrip() for line in response] 648 else: 649 return response.read() 650 651 652 @staticmethod 653 def servers(): 654 """Returns a list of servers that can serve as this type of server.""" 655 raise NotImplementedError() 656 657 658 @classmethod 659 def get_devservers_in_same_subnet(cls, ip, mask_bits=DEFAULT_SUBNET_MASKBIT, 660 unrestricted_only=False): 661 """Get the devservers in the same subnet of the given ip. 662 663 @param ip: The IP address of a dut to look for devserver. 664 @param mask_bits: Number of mask bits. Default is 19. 665 @param unrestricted_only: Set to True to select from devserver in 666 unrestricted subnet only. Default is False. 667 668 @return: A list of devservers in the same subnet of the given ip. 669 670 """ 671 # server from cls.servers() is a URL, e.g., http://10.1.1.10:8082, so 672 # we need a dict to return the full devserver path once the IPs are 673 # filtered in get_servers_in_same_subnet. 674 server_names = {} 675 all_devservers = [] 676 devservers = (cls.get_unrestricted_devservers() if unrestricted_only 677 else cls.servers()) 678 for server in devservers: 679 server_name = get_hostname(server) 680 server_names[server_name] = server 681 all_devservers.append(server_name) 682 if not all_devservers: 683 devserver_type = 'unrestricted only' if unrestricted_only else 'all' 684 raise DevServerFailToLocateException( 685 'Fail to locate a devserver for dut %s in %s devservers' 686 % (ip, devserver_type)) 687 688 devservers = utils.get_servers_in_same_subnet(ip, mask_bits, 689 all_devservers) 690 return [server_names[s] for s in devservers] 691 692 693 @classmethod 694 def get_unrestricted_devservers( 695 cls, restricted_subnets=utils.RESTRICTED_SUBNETS): 696 """Get the devservers not in any restricted subnet specified in 697 restricted_subnets. 698 699 @param restricted_subnets: A list of restriected subnets. 700 701 @return: A list of devservers not in any restricted subnet. 702 703 """ 704 if not restricted_subnets: 705 return cls.servers() 706 707 metrics.Counter('chromeos/autotest/devserver/unrestricted_hotfix') 708 return cls.servers() 709 710 @classmethod 711 def get_healthy_devserver(cls, build, devservers, ban_list=None): 712 """"Get a healthy devserver instance from the list of devservers. 713 714 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514). 715 @param devservers: The devserver list to be chosen out a healthy one. 716 @param ban_list: The ban_list of devservers we don't want to choose. 717 Default is None. 718 719 @return: A DevServer object of a healthy devserver. Return None if no 720 healthy devserver is found. 721 722 """ 723 logging.debug('Pick one healthy devserver from %r', devservers) 724 while devservers: 725 hash_index = hash(build) % len(devservers) 726 devserver = devservers.pop(hash_index) 727 logging.debug('Check health for %s', devserver) 728 if ban_list and devserver in ban_list: 729 continue 730 731 if cls.devserver_healthy(devserver): 732 logging.debug('Pick %s', devserver) 733 return cls(devserver) 734 735 736 @classmethod 737 def get_available_devservers(cls, 738 hostname=None, 739 prefer_local_devserver=PREFER_LOCAL_DEVSERVER, 740 restricted_subnets=utils.ALL_SUBNETS): 741 """Get devservers in the same subnet of the given hostname. 742 743 @param hostname: Hostname of a DUT to choose devserver for. 744 @param prefer_local_devserver: A boolean indicating using a devserver in 745 the same subnet with the DUT. 746 @param restricted_subnets: A list of restricted subnets or p2p subnet 747 groups. 748 749 @return: A tuple of (devservers, can_retry), devservers is a list of 750 devservers that's available for the given hostname. can_retry 751 is a flag that indicate if caller can retry the selection of 752 devserver if no devserver in the returned devservers can be 753 used. For example, if hostname is in a restricted subnet, 754 can_retry will be False. 755 """ 756 logging.info('Getting devservers for host: %s', hostname) 757 host_ip = None 758 if hostname: 759 host_ip = bin_utils.get_ip_address(hostname) 760 if not host_ip: 761 logging.error('Failed to get IP address of %s. Will pick a ' 762 'devserver without subnet constraint.', hostname) 763 764 if not host_ip: 765 return cls.get_unrestricted_devservers(restricted_subnets), False 766 767 # For the sake of backward compatibility, we use the argument 768 # 'restricted_subnets' to store both the legacy subnets (a tuple of 769 # (ip, mask)) and p2p subnets group (a list of subnets, i.e. [(ip, 770 # mask), ...]) data. For consistency, we convert all legacy subnets to 771 # a "singleton p2p subnets" and store them in a new list. 772 all_subnets = [] 773 for s in restricted_subnets: 774 if isinstance(s, tuple): 775 all_subnets.append([s]) 776 else: 777 all_subnets.append(s) 778 779 # Find devservers in the subnets reachable from the DUT. 780 if host_ip and all_subnets: 781 subnet_group = _get_subnet_group_for_host_ip( 782 host_ip, all_subnets=all_subnets) 783 if subnet_group: 784 devservers = set() 785 for ip, mask in subnet_group: 786 logging.debug( 787 'The host %s (%s) is in a restricted subnet ' 788 '(or its peers). ' 789 'Try to locate devservers inside subnet ' 790 '%s/%d.', hostname, host_ip, ip, mask) 791 devservers |= set( 792 cls.get_devservers_in_same_subnet(ip, mask)) 793 return sorted(devservers), False 794 795 # If prefer_local_devserver is set to True and the host is not in 796 # restricted subnet, pick a devserver in the same subnet if possible. 797 # Set can_retry to True so it can pick a different devserver if all 798 # devservers in the same subnet are down. 799 if prefer_local_devserver: 800 return (cls.get_devservers_in_same_subnet( 801 host_ip, DEFAULT_SUBNET_MASKBIT, True), True) 802 803 return cls.get_unrestricted_devservers(restricted_subnets), False 804 805 806 @classmethod 807 def resolve(cls, build, hostname=None, ban_list=None): 808 """"Resolves a build to a devserver instance. 809 810 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514). 811 @param hostname: The hostname of dut that requests a devserver. It's 812 used to make sure a devserver in the same subnet is 813 preferred. 814 @param ban_list: The ban_list of devservers shouldn't be chosen. 815 816 @raise DevServerException: If no devserver is available. 817 """ 818 tried_devservers = set() 819 devservers, can_retry = cls.get_available_devservers(hostname) 820 if devservers: 821 tried_devservers |= set(devservers) 822 823 devserver = cls.get_healthy_devserver(build, devservers, 824 ban_list=ban_list) 825 826 if not devserver and can_retry: 827 # Find available devservers without dut location constrain. 828 devservers, _ = cls.get_available_devservers() 829 devserver = cls.get_healthy_devserver(build, devservers, 830 ban_list=ban_list) 831 if devservers: 832 tried_devservers |= set(devservers) 833 if devserver: 834 return devserver 835 else: 836 subnet = 'unrestricted subnet' 837 if hostname is not None: 838 host_ip = bin_utils.get_ip_address(hostname) 839 if host_ip: 840 subnet_ip, mask_bits = _get_subnet_for_host_ip(host_ip) 841 subnet = '%s/%s' % (str(subnet_ip), str(mask_bits)) 842 843 error_msg = ('All devservers in subnet: %s are currently down: ' 844 '%s. (dut hostname: %s)' % 845 (subnet, tried_devservers, hostname)) 846 logging.error(error_msg) 847 c = metrics.Counter( 848 'chromeos/autotest/devserver/subnet_without_devservers') 849 c.increment(fields={'subnet': subnet, 'hostname': str(hostname)}) 850 raise DevServerException(error_msg) 851 852 853 @classmethod 854 def random(cls): 855 """Return a random devserver that's available. 856 857 Devserver election in `resolve` method is based on a hash of the 858 build that a caller wants to stage. The purpose is that different 859 callers requesting for the same build can get the same devserver, 860 while the lab is able to distribute different builds across all 861 devservers. That helps to reduce the duplication of builds across 862 all devservers. 863 This function returns a random devserver, by passing a random 864 pseudo build name to `resolve `method. 865 """ 866 return cls.resolve(build=str(time.time())) 867 868 869class CrashServer(DevServer): 870 """Class of DevServer that symbolicates crash dumps.""" 871 872 @staticmethod 873 def servers(): 874 return _get_crash_server_list() 875 876 877 @remote_devserver_call() 878 def symbolicate_dump(self, minidump_path, build): 879 """Ask the devserver to symbolicate the dump at minidump_path. 880 881 Stage the debug symbols for |build| and, if that works, ask the 882 devserver to symbolicate the dump at |minidump_path|. 883 884 @param minidump_path: the on-disk path of the minidump. 885 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 886 whose debug symbols are needed for symbolication. 887 @return The contents of the stack trace 888 @raise DevServerException upon any return code that's not HTTP OK. 889 """ 890 try: 891 import requests 892 except ImportError: 893 logging.warning("Can't 'import requests' to connect to dev server.") 894 return '' 895 f = {'dev_server': self.resolved_hostname} 896 c = metrics.Counter('chromeos/autotest/crashserver/symbolicate_dump') 897 c.increment(fields=f) 898 # Symbolicate minidump. 899 m = 'chromeos/autotest/crashserver/symbolicate_dump_duration' 900 with metrics.SecondsTimer(m, fields=f): 901 call = self.build_call('symbolicate_dump', 902 archive_url=_get_image_storage_server() + build) 903 request = requests.post( 904 call, files={'minidump': open(minidump_path, 'rb')}) 905 if request.status_code == requests.codes.OK: 906 return request.text 907 908 error_fd = six.StringIO(request.text) 909 raise urllib.error.HTTPError( 910 call, request.status_code, request.text, request.headers, 911 error_fd) 912 913 914 @classmethod 915 def get_available_devservers(cls, hostname): 916 """Get all available crash servers. 917 918 Crash server election doesn't need to count the location of hostname. 919 920 @param hostname: Hostname of a DUT to choose devserver for. 921 922 @return: A tuple of (all crash servers, False). can_retry is set to 923 False, as all crash servers are returned. There is no point to 924 retry. 925 """ 926 return cls.servers(), False 927 928 929class ImageServerBase(DevServer): 930 """Base class for devservers used to stage builds. 931 932 CrOS and Android builds are staged in different ways as they have different 933 sets of artifacts. This base class abstracts the shared functions between 934 the two types of ImageServer. 935 """ 936 937 @classmethod 938 def servers(cls): 939 """Returns a list of servers that can serve as a desired type of 940 devserver. 941 """ 942 return _get_dev_server_list() 943 944 945 def _get_image_url(self, image): 946 """Returns the url of the directory for this image on the devserver. 947 948 @param image: the image that was fetched. 949 """ 950 image = self.translate(image) 951 url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern', 952 type=str) 953 return (url_pattern % (self.url(), image)).replace('update', 'static') 954 955 956 @staticmethod 957 def create_metadata(server_name, image, artifacts=None, files=None): 958 """Create a metadata dictionary given the staged items. 959 960 The metadata can be send to metadata db along with stats. 961 962 @param server_name: name of the devserver, e.g 172.22.33.44. 963 @param image: The name of the image. 964 @param artifacts: A list of artifacts. 965 @param files: A list of files. 966 967 @return A metadata dictionary. 968 969 """ 970 metadata = {'devserver': server_name, 971 'image': image, 972 '_type': 'devserver'} 973 if artifacts: 974 metadata['artifacts'] = ' '.join(artifacts) 975 if files: 976 metadata['files'] = ' '.join(files) 977 return metadata 978 979 980 @classmethod 981 def run_ssh_call(cls, call, readline=False, timeout=None): 982 """Construct an ssh-based rpc call, and execute it. 983 984 @param call: a url string that calls a method to a devserver. 985 @param readline: whether read http response line by line. 986 @param timeout: The timeout seconds for ssh call. 987 988 @return the results of this call. 989 """ 990 hostname = get_hostname(call) 991 ssh_call = 'ssh %s \'curl "%s"\'' % (hostname, utils.sh_escape(call)) 992 timeout_seconds = timeout if timeout else DEVSERVER_SSH_TIMEOUT_MINS*60 993 try: 994 result = utils.run(ssh_call, timeout=timeout_seconds) 995 except error.CmdError as e: 996 logging.debug('Error occurred with exit_code %d when executing the ' 997 'ssh call: %s.', e.result_obj.exit_status, 998 e.result_obj.stderr) 999 c = metrics.Counter('chromeos/autotest/devserver/ssh_failure') 1000 c.increment(fields={'dev_server': hostname}) 1001 raise 1002 response = result.stdout 1003 1004 # If the curl command's returned HTTP response contains certain 1005 # exception string, raise the DevServerException of the response. 1006 if 'DownloaderException' in response: 1007 raise DevServerException(_strip_http_message(response)) 1008 1009 if readline: 1010 # Remove line terminators and trailing whitespace 1011 response = response.splitlines() 1012 return [line.rstrip() for line in response] 1013 1014 return response 1015 1016 1017 @classmethod 1018 def run_call(cls, call, readline=False, timeout=None): 1019 """Invoke a given devserver call using urllib.open or ssh. 1020 1021 Open the URL with HTTP or SSH-based HTTP, and return the text of the 1022 response. Exceptions may be raised as for urllib2.urlopen() or 1023 utils.run(). 1024 1025 @param call: a url string that calls a method to a devserver. 1026 @param readline: whether read http response line by line. 1027 @param timeout: The timeout seconds for urlopen call or ssh call. 1028 1029 @return the results of this call. 1030 """ 1031 server_name = get_hostname(call) 1032 is_in_restricted_subnet = utils.get_restricted_subnet( 1033 server_name, utils.get_all_restricted_subnets()) 1034 _EMPTY_SENTINEL_VALUE = object() 1035 def kickoff_call(): 1036 """Invoke a given devserver call using urllib.open or ssh. 1037 1038 @param call: a url string that calls a method to a devserver. 1039 @param is_in_restricted_subnet: whether the devserver is in subnet. 1040 @param readline: whether read http response line by line. 1041 @param timeout: The timeout seconds for urlopen call or ssh call. 1042 """ 1043 if (not ENABLE_SSH_CONNECTION_FOR_DEVSERVER or 1044 not is_in_restricted_subnet): 1045 response = super(ImageServerBase, cls).run_call( 1046 call, readline=readline, timeout=timeout) 1047 else: 1048 response = cls.run_ssh_call( 1049 call, readline=readline, timeout=timeout) 1050 1051 # six.ensure_str would be nice, but its not in all the envs, so 1052 # this is what we are left with for now. 1053 if isinstance(response, bytes): 1054 response = response.decode() 1055 # Retry if devserver service is temporarily down, e.g. in a 1056 # devserver push. 1057 if ERR_MSG_FOR_DOWN_DEVSERVER in response: 1058 return False 1059 1060 # Don't return response directly since it may be empty string, 1061 # which causes poll_for_condition to retry. 1062 return _EMPTY_SENTINEL_VALUE if not response else response 1063 1064 try: 1065 response = bin_utils.poll_for_condition( 1066 kickoff_call, 1067 exception=bin_utils.TimeoutError(), 1068 timeout=60, 1069 sleep_interval=5) 1070 return '' if response is _EMPTY_SENTINEL_VALUE else response 1071 except bin_utils.TimeoutError: 1072 return ERR_MSG_FOR_DOWN_DEVSERVER 1073 1074 1075 @classmethod 1076 def download_file(cls, remote_file, local_file, timeout=None): 1077 """Download file from devserver. 1078 1079 The format of remote_file should be: 1080 http://devserver_ip:8082/static/board/... 1081 1082 @param remote_file: The URL of the file on devserver that need to be 1083 downloaded. 1084 @param local_file: The path of the file saved to local. 1085 @param timeout: The timeout seconds for this call. 1086 """ 1087 server_name = get_hostname(remote_file) 1088 is_in_restricted_subnet = utils.get_restricted_subnet( 1089 server_name, utils.get_all_restricted_subnets()) 1090 1091 if (not ENABLE_SSH_CONNECTION_FOR_DEVSERVER 1092 or not is_in_restricted_subnet): 1093 response = super(ImageServerBase, cls).run_request(remote_file, 1094 timeout=timeout) 1095 with open(local_file, 'wb') as out_log: 1096 shutil.copyfileobj(response, out_log) 1097 else: 1098 timeout_seconds = timeout if timeout else DEVSERVER_SSH_TIMEOUT_MINS * 60 1099 # SSH to the dev server and attach the local file as stdout. 1100 with open(local_file, 'wb') as out_log: 1101 ssh_cmd = [ 1102 'ssh', server_name, 1103 'curl -s -S -f "%s"' % utils.sh_escape(remote_file) 1104 ] 1105 logging.debug("Running command %s", ssh_cmd) 1106 with open(os.devnull) as devnull: 1107 cmd = subprocess.Popen( 1108 ssh_cmd, 1109 stdout=out_log, 1110 stdin=devnull, 1111 stderr=subprocess.PIPE, 1112 ) 1113 1114 # Python 2.7 doesn't have Popen.wait(timeout), so start a 1115 # timer and kill the ssh process if it takes too long. 1116 def stop_process(): 1117 """Kills the subprocess after the timeout.""" 1118 cmd.kill() 1119 logging.error("ssh call timed out after %s secs", 1120 timeout_seconds) 1121 1122 t = Timer(timeout_seconds, stop_process) 1123 try: 1124 t.start() 1125 cmd.wait() 1126 finally: 1127 t.cancel() 1128 error_output = cmd.stderr.read() 1129 if error_output: 1130 logging.error("ssh call output: %s", error_output) 1131 if cmd.returncode != 0: 1132 c = metrics.Counter( 1133 'chromeos/autotest/devserver/ssh_failure') 1134 c.increment(fields={'dev_server': server_name}) 1135 raise DevServerException( 1136 "ssh call failed with exit code %s", 1137 cmd.returncode) 1138 1139 1140 def _poll_is_staged(self, **kwargs): 1141 """Polling devserver.is_staged until all artifacts are staged. 1142 1143 @param kwargs: keyword arguments to make is_staged devserver call. 1144 1145 @return: True if all artifacts are staged in devserver. 1146 """ 1147 call = self.build_call('is_staged', **kwargs) 1148 1149 def all_staged(): 1150 """Call devserver.is_staged rpc to check if all files are staged. 1151 1152 @return: True if all artifacts are staged in devserver. False 1153 otherwise. 1154 @rasies DevServerException, the exception is a wrapper of all 1155 exceptions that were raised when devserver tried to download 1156 the artifacts. devserver raises an HTTPError or a CmdError 1157 when an exception was raised in the code. Such exception 1158 should be re-raised here to stop the caller from waiting. 1159 If the call to devserver failed for connection issue, a 1160 URLError exception is raised, and caller should retry the 1161 call to avoid such network flakiness. 1162 1163 """ 1164 try: 1165 result = self.run_call(call) 1166 logging.debug('whether artifact is staged: %r', result) 1167 return result == 'True' 1168 except urllib.error.HTTPError as e: 1169 error_markup = e.read() 1170 raise DevServerException(_strip_http_message(error_markup)) 1171 except urllib.error.URLError as e: 1172 # Could be connection issue, retry it. 1173 # For example: <urlopen error [Errno 111] Connection refused> 1174 logging.error('URLError happens in is_stage: %r', e) 1175 return False 1176 except error.CmdError as e: 1177 # Retry if SSH failed to connect to the devserver. 1178 logging.warning('CmdError happens in is_stage: %r, will retry', e) 1179 return False 1180 1181 bin_utils.poll_for_condition( 1182 all_staged, 1183 exception=bin_utils.TimeoutError(), 1184 timeout=DEVSERVER_IS_STAGING_RETRY_MIN * 60, 1185 sleep_interval=_ARTIFACT_STAGE_POLLING_INTERVAL) 1186 1187 return True 1188 1189 1190 def _call_and_wait(self, call_name, error_message, 1191 expected_response=SUCCESS, **kwargs): 1192 """Helper method to make a urlopen call, and wait for artifacts staged. 1193 1194 @param call_name: name of devserver rpc call. 1195 @param error_message: Error message to be thrown if response does not 1196 match expected_response. 1197 @param expected_response: Expected response from rpc, default to 1198 |Success|. If it's set to None, do not compare 1199 the actual response. Any response is consider 1200 to be good. 1201 @param kwargs: keyword arguments to make is_staged devserver call. 1202 1203 @return: The response from rpc. 1204 @raise DevServerException upon any return code that's expected_response. 1205 1206 """ 1207 call = self.build_call(call_name, is_async=True, **kwargs) 1208 try: 1209 response = self.run_call(call) 1210 logging.debug('response for RPC: %r', response) 1211 if ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE in response: 1212 logging.debug('Proxy error happens in RPC call, ' 1213 'will retry in 30 seconds') 1214 time.sleep(30) 1215 raise DevServerOverloadException() 1216 except six.moves.http_client.BadStatusLine as e: 1217 logging.error(e) 1218 raise DevServerException('Received Bad Status line, Devserver %s ' 1219 'might have gone down while handling ' 1220 'the call: %s' % (self.url(), call)) 1221 1222 if expected_response and not response == expected_response: 1223 raise DevServerException(error_message) 1224 1225 # `os_type` is needed in build a devserver call, but not needed for 1226 # wait_for_artifacts_staged, since that method is implemented by 1227 # each ImageServerBase child class. 1228 if 'os_type' in kwargs: 1229 del kwargs['os_type'] 1230 self.wait_for_artifacts_staged(**kwargs) 1231 return response 1232 1233 1234 def _stage_artifacts(self, build, artifacts, files, archive_url, **kwargs): 1235 """Tell the devserver to download and stage |artifacts| from |image| 1236 specified by kwargs. 1237 1238 This is the main call point for staging any specific artifacts for a 1239 given build. To see the list of artifacts one can stage see: 1240 1241 ~src/platfrom/dev/artifact_info.py. 1242 1243 This is maintained along with the actual devserver code. 1244 1245 @param artifacts: A list of artifacts. 1246 @param files: A list of files to stage. 1247 @param archive_url: Optional parameter that has the archive_url to stage 1248 this artifact from. Default is specified in autotest config + 1249 image. 1250 @param kwargs: keyword arguments that specify the build information, to 1251 make stage devserver call. 1252 1253 @raise DevServerException upon any return code that's not HTTP OK. 1254 """ 1255 if not archive_url: 1256 archive_url = os.path.join( 1257 _get_storage_server_for_artifacts(artifacts), build) 1258 1259 artifacts_arg = ','.join(artifacts) if artifacts else '' 1260 files_arg = ','.join(files) if files else '' 1261 error_message = ("staging %s for %s failed;" 1262 "HTTP OK not accompanied by 'Success'." % 1263 ('artifacts=%s files=%s ' % (artifacts_arg, files_arg), 1264 build)) 1265 1266 staging_info = ('build=%s, artifacts=%s, files=%s, archive_url=%s' % 1267 (build, artifacts, files, archive_url)) 1268 logging.info('Staging artifacts on devserver %s: %s', 1269 self.url(), staging_info) 1270 success = False 1271 try: 1272 arguments = {'archive_url': archive_url, 1273 'artifacts': artifacts_arg, 1274 'files': files_arg} 1275 if kwargs: 1276 arguments.update(kwargs) 1277 f = {'artifacts': artifacts_arg, 1278 'dev_server': self.resolved_hostname} 1279 with metrics.SecondsTimer( 1280 'chromeos/autotest/devserver/stage_artifact_duration', 1281 fields=f): 1282 self.call_and_wait(call_name='stage', error_message=error_message, 1283 **arguments) 1284 logging.info('Finished staging artifacts: %s', staging_info) 1285 success = True 1286 except (bin_utils.TimeoutError, error.TimeoutException): 1287 logging.error('stage_artifacts timed out: %s', staging_info) 1288 raise DevServerException( 1289 'stage_artifacts timed out: %s' % staging_info) 1290 finally: 1291 f = {'success': success, 1292 'artifacts': artifacts_arg, 1293 'dev_server': self.resolved_hostname} 1294 metrics.Counter('chromeos/autotest/devserver/stage_artifact' 1295 ).increment(fields=f) 1296 1297 1298 def call_and_wait(self, *args, **kwargs): 1299 """Helper method to make a urlopen call, and wait for artifacts staged. 1300 1301 This method needs to be overridden in the subclass to implement the 1302 logic to call _call_and_wait. 1303 """ 1304 raise NotImplementedError 1305 1306 1307 def _trigger_download(self, build, artifacts, files, synchronous=True, 1308 **kwargs_build_info): 1309 """Tell the devserver to download and stage image specified in 1310 kwargs_build_info. 1311 1312 Tells the devserver to fetch |image| from the image storage server 1313 named by _get_image_storage_server(). 1314 1315 If |synchronous| is True, waits for the entire download to finish 1316 staging before returning. Otherwise only the artifacts necessary 1317 to start installing images onto DUT's will be staged before returning. 1318 A caller can then call finish_download to guarantee the rest of the 1319 artifacts have finished staging. 1320 1321 @param synchronous: if True, waits until all components of the image are 1322 staged before returning. 1323 @param kwargs_build_info: Dictionary of build information. 1324 For CrOS, it is None as build is the CrOS image name. 1325 For Android, it is {'target': target, 1326 'build_id': build_id, 1327 'branch': branch} 1328 1329 @raise DevServerException upon any return code that's not HTTP OK. 1330 1331 """ 1332 if kwargs_build_info: 1333 archive_url = None 1334 else: 1335 archive_url = _get_image_storage_server() + build 1336 error_message = ("trigger_download for %s failed;" 1337 "HTTP OK not accompanied by 'Success'." % build) 1338 kwargs = {'archive_url': archive_url, 1339 'artifacts': artifacts, 1340 'files': files, 1341 'error_message': error_message} 1342 if kwargs_build_info: 1343 kwargs.update(kwargs_build_info) 1344 1345 logging.info('trigger_download starts for %s', build) 1346 try: 1347 response = self.call_and_wait(call_name='stage', **kwargs) 1348 logging.info('trigger_download finishes for %s', build) 1349 except (bin_utils.TimeoutError, error.TimeoutException): 1350 logging.error('trigger_download timed out for %s.', build) 1351 raise DevServerException( 1352 'trigger_download timed out for %s.' % build) 1353 was_successful = response == SUCCESS 1354 if was_successful and synchronous: 1355 self._finish_download(build, artifacts, files, **kwargs_build_info) 1356 1357 1358 def _finish_download(self, build, artifacts, files, **kwargs_build_info): 1359 """Tell the devserver to finish staging image specified in 1360 kwargs_build_info. 1361 1362 If trigger_download is called with synchronous=False, it will return 1363 before all artifacts have been staged. This method contacts the 1364 devserver and blocks until all staging is completed and should be 1365 called after a call to trigger_download. 1366 1367 @param kwargs_build_info: Dictionary of build information. 1368 For CrOS, it is None as build is the CrOS image name. 1369 For Android, it is {'target': target, 1370 'build_id': build_id, 1371 'branch': branch} 1372 1373 @raise DevServerException upon any return code that's not HTTP OK. 1374 """ 1375 archive_url = _get_image_storage_server() + build 1376 error_message = ("finish_download for %s failed;" 1377 "HTTP OK not accompanied by 'Success'." % build) 1378 kwargs = {'archive_url': archive_url, 1379 'artifacts': artifacts, 1380 'files': files, 1381 'error_message': error_message} 1382 if kwargs_build_info: 1383 kwargs.update(kwargs_build_info) 1384 try: 1385 self.call_and_wait(call_name='stage', **kwargs) 1386 except (bin_utils.TimeoutError, error.TimeoutException): 1387 logging.error('finish_download timed out for %s', build) 1388 raise DevServerException( 1389 'finish_download timed out for %s.' % build) 1390 1391 1392 @remote_devserver_call() 1393 def locate_file(self, file_name, artifacts, build, build_info): 1394 """Locate a file with the given file_name on devserver. 1395 1396 This method calls devserver RPC `locate_file` to look up a file with 1397 the given file name inside specified build artifacts. 1398 1399 @param file_name: Name of the file to look for a file. 1400 @param artifacts: A list of artifact names to search for the file. 1401 @param build: Name of the build. For Android, it's None as build_info 1402 should be used. 1403 @param build_info: Dictionary of build information. 1404 For CrOS, it is None as build is the CrOS image name. 1405 For Android, it is {'target': target, 1406 'build_id': build_id, 1407 'branch': branch} 1408 1409 @return: A devserver url to the file. 1410 @raise DevServerException upon any return code that's not HTTP OK. 1411 """ 1412 if not build and not build_info: 1413 raise DevServerException('You must specify build information to ' 1414 'look for file %s in artifacts %s.' % 1415 (file_name, artifacts)) 1416 kwargs = {'file_name': file_name, 1417 'artifacts': artifacts} 1418 if build_info: 1419 build_path = '%(branch)s/%(target)s/%(build_id)s' % build_info 1420 kwargs.update(build_info) 1421 # Devserver treats Android and Brillo build in the same way as they 1422 # are both retrieved from Launch Control and have similar build 1423 # artifacts. Therefore, os_type for devserver calls is `android` for 1424 # both Android and Brillo builds. 1425 kwargs['os_type'] = 'android' 1426 else: 1427 build_path = build 1428 kwargs['build'] = build 1429 call = self.build_call('locate_file', is_async=False, **kwargs) 1430 try: 1431 file_path = self.run_call(call) 1432 return os.path.join(self.url(), 'static', build_path, file_path) 1433 except six.moves.http_client.BadStatusLine as e: 1434 logging.error(e) 1435 raise DevServerException('Received Bad Status line, Devserver %s ' 1436 'might have gone down while handling ' 1437 'the call: %s' % (self.url(), call)) 1438 1439 1440 @remote_devserver_call() 1441 def list_control_files(self, build, suite_name=''): 1442 """Ask the devserver to list all control files for |build|. 1443 1444 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 1445 whose control files the caller wants listed. 1446 @param suite_name: The name of the suite for which we require control 1447 files. 1448 @return None on failure, or a list of control file paths 1449 (e.g. server/site_tests/autoupdate/control) 1450 @raise DevServerException upon any return code that's not HTTP OK. 1451 """ 1452 build = self.translate(build) 1453 call = self.build_call('controlfiles', build=build, 1454 suite_name=suite_name) 1455 return self.run_call(call, readline=True) 1456 1457 1458 @remote_devserver_call() 1459 def get_control_file(self, build, control_path): 1460 """Ask the devserver for the contents of a control file. 1461 1462 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 1463 whose control file the caller wants to fetch. 1464 @param control_path: The file to fetch 1465 (e.g. server/site_tests/autoupdate/control) 1466 @return The contents of the desired file. 1467 @raise DevServerException upon any return code that's not HTTP OK. 1468 """ 1469 build = self.translate(build) 1470 call = self.build_call('controlfiles', build=build, 1471 control_path=control_path) 1472 return self.run_call(call) 1473 1474 1475 @remote_devserver_call() 1476 def list_suite_controls(self, build, suite_name=''): 1477 """Ask the devserver to list contents of all control files for |build|. 1478 1479 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 1480 whose control files' contents the caller wants returned. 1481 @param suite_name: The name of the suite for which we require control 1482 files. 1483 @return None on failure, or a dict of contents of all control files 1484 (e.g. {'path1': "#Copyright controls ***", ..., 1485 pathX': "#Copyright controls ***"} 1486 @raise DevServerException upon any return code that's not HTTP OK. 1487 """ 1488 build = self.translate(build) 1489 call = self.build_call('list_suite_controls', build=build, 1490 suite_name=suite_name) 1491 return json.load(six.StringIO(self.run_call(call))) 1492 1493 1494class ImageServer(ImageServerBase): 1495 """Class for DevServer that handles RPCs related to CrOS images. 1496 1497 The calls to devserver to stage artifacts, including stage and download, are 1498 made in async mode. That is, when caller makes an RPC |stage| to request 1499 devserver to stage certain artifacts, devserver handles the call and starts 1500 staging artifacts in a new thread, and return |Success| without waiting for 1501 staging being completed. When caller receives message |Success|, it polls 1502 devserver's is_staged call until all artifacts are staged. 1503 Such mechanism is designed to prevent cherrypy threads in devserver being 1504 running out, as staging artifacts might take long time, and cherrypy starts 1505 with a fixed number of threads that handle devserver rpc. 1506 """ 1507 1508 class ArtifactUrls(object): 1509 """A container for URLs of staged artifacts. 1510 1511 Attributes: 1512 full_payload: URL for downloading a staged full release update 1513 mton_payload: URL for downloading a staged M-to-N release update 1514 nton_payload: URL for downloading a staged N-to-N release update 1515 1516 """ 1517 def __init__(self, full_payload=None, mton_payload=None, 1518 nton_payload=None): 1519 self.full_payload = full_payload 1520 self.mton_payload = mton_payload 1521 self.nton_payload = nton_payload 1522 1523 1524 def wait_for_artifacts_staged(self, 1525 archive_url, 1526 artifacts='', 1527 files='', 1528 **kwargs): 1529 """Polling devserver.is_staged until all artifacts are staged. 1530 1531 @param archive_url: Google Storage URL for the build. 1532 @param artifacts: Comma separated list of artifacts to download. 1533 @param files: Comma separated list of files to download. 1534 @param kwargs: keyword arguments to make is_staged devserver call. 1535 @return: True if all artifacts are staged in devserver. 1536 """ 1537 kwargs = {'archive_url': archive_url, 1538 'artifacts': artifacts, 1539 'files': files} 1540 return self._poll_is_staged(**kwargs) 1541 1542 1543 @remote_devserver_call() 1544 def call_and_wait(self, 1545 call_name, 1546 archive_url, 1547 artifacts, 1548 files, 1549 error_message, 1550 expected_response=SUCCESS, 1551 clean=False): 1552 """Helper method to make a urlopen call, and wait for artifacts staged. 1553 1554 @param call_name: name of devserver rpc call. 1555 @param archive_url: Google Storage URL for the build.. 1556 @param artifacts: Comma separated list of artifacts to download. 1557 @param files: Comma separated list of files to download. 1558 @param expected_response: Expected response from rpc, default to 1559 |Success|. If it's set to None, do not compare 1560 the actual response. Any response is consider 1561 to be good. 1562 @param error_message: Error message to be thrown if response does not 1563 match expected_response. 1564 @param clean: Force re-loading artifacts/files from cloud, ignoring 1565 cached version. 1566 1567 @return: The response from rpc. 1568 @raise DevServerException upon any return code that's expected_response. 1569 1570 """ 1571 kwargs = { 1572 'archive_url': archive_url, 1573 'artifacts': artifacts, 1574 'files': files, 1575 'clean': clean 1576 } 1577 return self._call_and_wait(call_name, error_message, 1578 expected_response, **kwargs) 1579 1580 1581 @remote_devserver_call() 1582 def stage_artifacts(self, image=None, artifacts=None, files='', 1583 archive_url=None, **kwargs): 1584 """Tell the devserver to download and stage |artifacts| from |image|. 1585 1586 This is the main call point for staging any specific artifacts for a 1587 given build. To see the list of artifacts one can stage see: 1588 1589 ~src/platfrom/dev/artifact_info.py. 1590 1591 This is maintained along with the actual devserver code. 1592 1593 @param image: the image to fetch and stage. 1594 @param artifacts: A list of artifacts. 1595 @param files: A list of files to stage. 1596 @param archive_url: Optional parameter that has the archive_url to stage 1597 this artifact from. Default is specified in autotest config + 1598 image. 1599 @param kwargs: keyword arguments that specify the build information, to 1600 make stage devserver call. 1601 1602 @raise DevServerException upon any return code that's not HTTP OK. 1603 """ 1604 if not artifacts and not files: 1605 raise DevServerException('Must specify something to stage.') 1606 image = self.translate(image) 1607 self._stage_artifacts(image, artifacts, files, archive_url, **kwargs) 1608 1609 1610 @remote_devserver_call(timeout_min=DEVSERVER_SSH_TIMEOUT_MINS) 1611 def list_image_dir(self, image): 1612 """List the contents of the image stage directory, on the devserver. 1613 1614 @param image: The image name, eg: <board>-<branch>/<Milestone>-<build>. 1615 1616 @raise DevServerException upon any return code that's not HTTP OK. 1617 """ 1618 image = self.translate(image) 1619 logging.info('Requesting contents from devserver %s for image %s', 1620 self.url(), image) 1621 archive_url = _get_storage_server_for_artifacts() + image 1622 call = self.build_call('list_image_dir', archive_url=archive_url) 1623 response = self.run_call(call, readline=True) 1624 for line in response: 1625 logging.info(line) 1626 1627 1628 def trigger_download(self, image, synchronous=True): 1629 """Tell the devserver to download and stage |image|. 1630 1631 Tells the devserver to fetch |image| from the image storage server 1632 named by _get_image_storage_server(). 1633 1634 If |synchronous| is True, waits for the entire download to finish 1635 staging before returning. Otherwise only the artifacts necessary 1636 to start installing images onto DUT's will be staged before returning. 1637 A caller can then call finish_download to guarantee the rest of the 1638 artifacts have finished staging. 1639 1640 @param image: the image to fetch and stage. 1641 @param synchronous: if True, waits until all components of the image are 1642 staged before returning. 1643 1644 @raise DevServerException upon any return code that's not HTTP OK. 1645 1646 """ 1647 image = self.translate(image) 1648 artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE 1649 self._trigger_download(image, artifacts, files='', 1650 synchronous=synchronous) 1651 1652 1653 @remote_devserver_call() 1654 def setup_telemetry(self, build): 1655 """Tell the devserver to setup telemetry for this build. 1656 1657 The devserver will stage autotest and then extract the required files 1658 for telemetry. 1659 1660 @param build: the build to setup telemetry for. 1661 1662 @returns path on the devserver that telemetry is installed to. 1663 """ 1664 build = self.translate(build) 1665 archive_url = _get_image_storage_server() + build 1666 call = self.build_call('setup_telemetry', archive_url=archive_url) 1667 try: 1668 response = self.run_call(call) 1669 except six.moves.http_client.BadStatusLine as e: 1670 logging.error(e) 1671 raise DevServerException('Received Bad Status line, Devserver %s ' 1672 'might have gone down while handling ' 1673 'the call: %s' % (self.url(), call)) 1674 return response 1675 1676 1677 def finish_download(self, image): 1678 """Tell the devserver to finish staging |image|. 1679 1680 If trigger_download is called with synchronous=False, it will return 1681 before all artifacts have been staged. This method contacts the 1682 devserver and blocks until all staging is completed and should be 1683 called after a call to trigger_download. 1684 1685 @param image: the image to fetch and stage. 1686 @raise DevServerException upon any return code that's not HTTP OK. 1687 """ 1688 image = self.translate(image) 1689 artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST 1690 self._finish_download(image, artifacts, files='') 1691 1692 1693 def get_update_url(self, image): 1694 """Returns the url that should be passed to the updater. 1695 1696 @param image: the image that was fetched. 1697 """ 1698 image = self.translate(image) 1699 url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern', 1700 type=str) 1701 return (url_pattern % (self.url(), image)) 1702 1703 1704 def get_staged_file_url(self, filename, image): 1705 """Returns the url of a staged file for this image on the devserver.""" 1706 return '/'.join([self._get_image_url(image), filename]) 1707 1708 1709 def get_test_image_url(self, image): 1710 """Returns a URL to a staged test image. 1711 1712 @param image: the image that was fetched. 1713 1714 @return A fully qualified URL that can be used for downloading the 1715 image. 1716 1717 """ 1718 return self._get_image_url(image) + '/chromiumos_test_image.bin' 1719 1720 1721 def get_recovery_image_url(self, image): 1722 """Returns a URL to a staged recovery image. 1723 1724 @param image: the image that was fetched. 1725 1726 @return A fully qualified URL that can be used for downloading the 1727 image. 1728 1729 """ 1730 return self._get_image_url(image) + '/recovery_image.bin' 1731 1732 1733 @remote_devserver_call() 1734 def get_dependencies_file(self, build): 1735 """Ask the dev server for the contents of the suite dependencies file. 1736 1737 Ask the dev server at |self._dev_server| for the contents of the 1738 pre-processed suite dependencies file (at DEPENDENCIES_FILE) 1739 for |build|. 1740 1741 @param build: The build (e.g. x86-mario-release/R21-2333.0.0) 1742 whose dependencies the caller is interested in. 1743 @return The contents of the dependencies file, which should eval to 1744 a dict of dicts, as per bin_utils/suite_preprocessor.py. 1745 @raise DevServerException upon any return code that's not HTTP OK. 1746 """ 1747 build = self.translate(build) 1748 call = self.build_call('controlfiles', 1749 build=build, control_path=DEPENDENCIES_FILE) 1750 return self.run_call(call) 1751 1752 1753 @remote_devserver_call() 1754 def get_latest_build_in_gs(self, board): 1755 """Ask the devservers for the latest offical build in Google Storage. 1756 1757 @param board: The board for who we want the latest official build. 1758 @return A string of the returned build rambi-release/R37-5868.0.0 1759 @raise DevServerException upon any return code that's not HTTP OK. 1760 """ 1761 call = self.build_call( 1762 'xbuddy_translate/remote/%s/latest-official' % board, 1763 image_dir=_get_image_storage_server()) 1764 image_name = self.run_call(call) 1765 return os.path.dirname(image_name) 1766 1767 1768 def translate(self, build_name): 1769 """Translate the build name if it's in LATEST format. 1770 1771 If the build name is in the format [builder]/LATEST, return the latest 1772 build in Google Storage otherwise return the build name as is. 1773 1774 @param build_name: build_name to check. 1775 1776 @return The actual build name to use. 1777 """ 1778 match = re.match(r'([\w-]+)-(\w+)/LATEST', build_name, re.I) 1779 if not match: 1780 return build_name 1781 translated_build = self.get_latest_build_in_gs(match.groups()[0]) 1782 logging.debug('Translated relative build %s to %s', build_name, 1783 translated_build) 1784 return translated_build 1785 1786 1787 @classmethod 1788 @remote_devserver_call() 1789 def get_latest_build(cls, target, milestone=''): 1790 """Ask all the devservers for the latest build for a given target. 1791 1792 @param target: The build target, typically a combination of the board 1793 and the type of build e.g. x86-mario-release. 1794 @param milestone: For latest build set to '', for builds only in a 1795 specific milestone set to a str of format Rxx 1796 (e.g. R16). Default: ''. Since we are dealing with a 1797 webserver sending an empty string, '', ensures that 1798 the variable in the URL is ignored as if it was set 1799 to None. 1800 @return A string of the returned build e.g. R20-2226.0.0. 1801 @raise DevServerException upon any return code that's not HTTP OK. 1802 """ 1803 calls = cls.build_all_calls('latestbuild', target=target, 1804 milestone=milestone) 1805 latest_builds = [] 1806 for call in calls: 1807 latest_builds.append(cls.run_call(call)) 1808 1809 return max(latest_builds, key=version.LooseVersion) 1810 1811 1812 def _read_json_response_from_devserver(self, response): 1813 """Reads the json response from the devserver. 1814 1815 This is extracted to its own function so that it can be easily mocked. 1816 @param response: the response for a devserver. 1817 """ 1818 try: 1819 return json.loads(response) 1820 except ValueError as e: 1821 logging.debug('Failed to load json response: %s', response) 1822 raise DevServerException(e) 1823 1824 1825 def _check_error_message(self, error_patterns_to_check, error_msg): 1826 """Detect whether specific error pattern exist in error message. 1827 1828 @param error_patterns_to_check: the error patterns to check 1829 @param error_msg: the error message which may include any error 1830 pattern. 1831 1832 @return A boolean variable, True if error_msg contains any error 1833 pattern in error_patterns_to_check, False otherwise. 1834 """ 1835 for err in error_patterns_to_check: 1836 if err in error_msg: 1837 return True 1838 1839 return False 1840 1841 1842class AndroidBuildServer(ImageServerBase): 1843 """Class for DevServer that handles RPCs related to Android builds. 1844 1845 The calls to devserver to stage artifacts, including stage and download, are 1846 made in async mode. That is, when caller makes an RPC |stage| to request 1847 devserver to stage certain artifacts, devserver handles the call and starts 1848 staging artifacts in a new thread, and return |Success| without waiting for 1849 staging being completed. When caller receives message |Success|, it polls 1850 devserver's is_staged call until all artifacts are staged. 1851 Such mechanism is designed to prevent cherrypy threads in devserver being 1852 running out, as staging artifacts might take long time, and cherrypy starts 1853 with a fixed number of threads that handle devserver rpc. 1854 """ 1855 1856 def wait_for_artifacts_staged(self, target, build_id, branch, 1857 archive_url=None, artifacts='', files=''): 1858 """Polling devserver.is_staged until all artifacts are staged. 1859 1860 @param target: Target of the android build to stage, e.g., 1861 shamu-userdebug. 1862 @param build_id: Build id of the android build to stage. 1863 @param branch: Branch of the android build to stage. 1864 @param archive_url: Google Storage URL for the build. 1865 @param artifacts: Comma separated list of artifacts to download. 1866 @param files: Comma separated list of files to download. 1867 1868 @return: True if all artifacts are staged in devserver. 1869 """ 1870 kwargs = {'target': target, 1871 'build_id': build_id, 1872 'branch': branch, 1873 'artifacts': artifacts, 1874 'files': files, 1875 'os_type': 'android'} 1876 if archive_url: 1877 kwargs['archive_url'] = archive_url 1878 return self._poll_is_staged(**kwargs) 1879 1880 1881 @remote_devserver_call() 1882 def call_and_wait(self, call_name, target, build_id, branch, archive_url, 1883 artifacts, files, error_message, 1884 expected_response=SUCCESS): 1885 """Helper method to make a urlopen call, and wait for artifacts staged. 1886 1887 @param call_name: name of devserver rpc call. 1888 @param target: Target of the android build to stage, e.g., 1889 shamu-userdebug. 1890 @param build_id: Build id of the android build to stage. 1891 @param branch: Branch of the android build to stage. 1892 @param archive_url: Google Storage URL for the CrOS build. 1893 @param artifacts: Comma separated list of artifacts to download. 1894 @param files: Comma separated list of files to download. 1895 @param expected_response: Expected response from rpc, default to 1896 |Success|. If it's set to None, do not compare 1897 the actual response. Any response is consider 1898 to be good. 1899 @param error_message: Error message to be thrown if response does not 1900 match expected_response. 1901 1902 @return: The response from rpc. 1903 @raise DevServerException upon any return code that's expected_response. 1904 1905 """ 1906 kwargs = {'target': target, 1907 'build_id': build_id, 1908 'branch': branch, 1909 'artifacts': artifacts, 1910 'files': files, 1911 'os_type': 'android'} 1912 if archive_url: 1913 kwargs['archive_url'] = archive_url 1914 return self._call_and_wait(call_name, error_message, expected_response, 1915 **kwargs) 1916 1917 1918 @remote_devserver_call() 1919 def stage_artifacts(self, target=None, build_id=None, branch=None, 1920 image=None, artifacts=None, files='', archive_url=None): 1921 """Tell the devserver to download and stage |artifacts| from |image|. 1922 1923 This is the main call point for staging any specific artifacts for a 1924 given build. To see the list of artifacts one can stage see: 1925 1926 ~src/platfrom/dev/artifact_info.py. 1927 1928 This is maintained along with the actual devserver code. 1929 1930 @param target: Target of the android build to stage, e.g., 1931 shamu-userdebug. 1932 @param build_id: Build id of the android build to stage. 1933 @param branch: Branch of the android build to stage. 1934 @param image: Name of a build to test, in the format of 1935 branch/target/build_id 1936 @param artifacts: A list of artifacts. 1937 @param files: A list of files to stage. 1938 @param archive_url: Optional parameter that has the archive_url to stage 1939 this artifact from. Default is specified in autotest config + 1940 image. 1941 1942 @raise DevServerException upon any return code that's not HTTP OK. 1943 """ 1944 if image and not target and not build_id and not branch: 1945 branch, target, build_id = utils.parse_launch_control_build(image) 1946 if not target or not build_id or not branch: 1947 raise DevServerException('Must specify all build info (target, ' 1948 'build_id and branch) to stage.') 1949 1950 android_build_info = {'target': target, 1951 'build_id': build_id, 1952 'branch': branch} 1953 if not artifacts and not files: 1954 raise DevServerException('Must specify something to stage.') 1955 if not all(android_build_info.values()): 1956 raise DevServerException( 1957 'To stage an Android build, must specify target, build id ' 1958 'and branch.') 1959 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 1960 self._stage_artifacts(build, artifacts, files, archive_url, 1961 **android_build_info) 1962 1963 def trigger_download(self, target, build_id, branch, artifacts=None, 1964 files='', os='android', synchronous=True): 1965 """Tell the devserver to download and stage an Android build. 1966 1967 Tells the devserver to fetch an Android build from the image storage 1968 server named by _get_image_storage_server(). 1969 1970 If |synchronous| is True, waits for the entire download to finish 1971 staging before returning. Otherwise only the artifacts necessary 1972 to start installing images onto DUT's will be staged before returning. 1973 A caller can then call finish_download to guarantee the rest of the 1974 artifacts have finished staging. 1975 1976 @param target: Target of the android build to stage, e.g., 1977 shamu-userdebug. 1978 @param build_id: Build id of the android build to stage. 1979 @param branch: Branch of the android build to stage. 1980 @param artifacts: A string of artifacts separated by comma. If None, 1981 use the default artifacts for Android or Brillo build. 1982 @param files: String of file seperated by commas. 1983 @param os: OS artifacts to download (android/brillo). 1984 @param synchronous: if True, waits until all components of the image are 1985 staged before returning. 1986 1987 @raise DevServerException upon any return code that's not HTTP OK. 1988 1989 """ 1990 android_build_info = {'target': target, 1991 'build_id': build_id, 1992 'branch': branch} 1993 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 1994 if not artifacts: 1995 board = target.split('-')[0] 1996 artifacts = ( 1997 android_utils.AndroidArtifacts.get_artifacts_for_reimage( 1998 board, os)) 1999 self._trigger_download(build, artifacts, files=files, 2000 synchronous=synchronous, **android_build_info) 2001 2002 2003 def finish_download(self, target, build_id, branch, os='android'): 2004 """Tell the devserver to finish staging an Android build. 2005 2006 If trigger_download is called with synchronous=False, it will return 2007 before all artifacts have been staged. This method contacts the 2008 devserver and blocks until all staging is completed and should be 2009 called after a call to trigger_download. 2010 2011 @param target: Target of the android build to stage, e.g., 2012 shamu-userdebug. 2013 @param build_id: Build id of the android build to stage. 2014 @param branch: Branch of the android build to stage. 2015 @param os: OS artifacts to download (android/brillo). 2016 2017 @raise DevServerException upon any return code that's not HTTP OK. 2018 """ 2019 android_build_info = {'target': target, 2020 'build_id': build_id, 2021 'branch': branch} 2022 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 2023 board = target.split('-')[0] 2024 artifacts = ( 2025 android_utils.AndroidArtifacts.get_artifacts_for_reimage( 2026 board)) 2027 self._finish_download(build, artifacts, files='', **android_build_info) 2028 2029 2030 def get_staged_file_url(self, filename, target, build_id, branch): 2031 """Returns the url of a staged file for this image on the devserver. 2032 2033 @param filename: Name of the file. 2034 @param target: Target of the android build to stage, e.g., 2035 shamu-userdebug. 2036 @param build_id: Build id of the android build to stage. 2037 @param branch: Branch of the android build to stage. 2038 2039 @return: The url of a staged file for this image on the devserver. 2040 """ 2041 android_build_info = {'target': target, 2042 'build_id': build_id, 2043 'branch': branch, 2044 'os_type': 'android'} 2045 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 2046 return '/'.join([self._get_image_url(build), filename]) 2047 2048 2049 @remote_devserver_call() 2050 def translate(self, build_name): 2051 """Translate the build name if it's in LATEST format. 2052 2053 If the build name is in the format [branch]/[target]/LATEST, return the 2054 latest build in Launch Control otherwise return the build name as is. 2055 2056 @param build_name: build_name to check. 2057 2058 @return The actual build name to use. 2059 """ 2060 branch, target, build_id = utils.parse_launch_control_build(build_name) 2061 if build_id.upper() != 'LATEST': 2062 return build_name 2063 call = self.build_call('latestbuild', branch=branch, target=target, 2064 os_type='android') 2065 translated_build_id = self.run_call(call) 2066 translated_build = (ANDROID_BUILD_NAME_PATTERN % 2067 {'branch': branch, 2068 'target': target, 2069 'build_id': translated_build_id}) 2070 logging.debug('Translated relative build %s to %s', build_name, 2071 translated_build) 2072 return translated_build 2073 2074 2075def _is_load_healthy(load): 2076 """Check if devserver's load meets the minimum threshold. 2077 2078 @param load: The devserver's load stats to check. 2079 2080 @return: True if the load meets the minimum threshold. Return False 2081 otherwise. 2082 2083 """ 2084 # Threshold checks, including CPU load. 2085 if load[DevServer.CPU_LOAD] > DevServer.MAX_CPU_LOAD: 2086 logging.debug('CPU load of devserver %s is at %s%%, which is higher ' 2087 'than the threshold of %s%%', load['devserver'], 2088 load[DevServer.CPU_LOAD], DevServer.MAX_CPU_LOAD) 2089 return False 2090 if load[DevServer.NETWORK_IO] > DevServer.MAX_NETWORK_IO: 2091 logging.debug('Network IO of devserver %s is at %i Bps, which is ' 2092 'higher than the threshold of %i bytes per second.', 2093 load['devserver'], load[DevServer.NETWORK_IO], 2094 DevServer.MAX_NETWORK_IO) 2095 return False 2096 return True 2097 2098 2099def _compare_load(devserver1, devserver2): 2100 """Comparator function to compare load between two devservers. 2101 2102 @param devserver1: A dictionary of devserver load stats to be compared. 2103 @param devserver2: A dictionary of devserver load stats to be compared. 2104 2105 @return: Negative value if the load of `devserver1` is less than the load 2106 of `devserver2`. Return positive value otherwise. 2107 2108 """ 2109 return int(devserver1[DevServer.DISK_IO] - devserver2[DevServer.DISK_IO]) 2110 2111 2112def _get_subnet_for_host_ip(host_ip, 2113 restricted_subnets=utils.RESTRICTED_SUBNETS): 2114 """Get the subnet for a given host IP. 2115 2116 @param host_ip: the IP of a DUT. 2117 @param restricted_subnets: A list of restriected subnets. 2118 2119 @return: a (subnet_ip, mask_bits) tuple. If no matched subnet for the 2120 host_ip, return (None, None). 2121 """ 2122 for subnet_ip, mask_bits in restricted_subnets: 2123 if utils.is_in_same_subnet(host_ip, subnet_ip, mask_bits): 2124 return subnet_ip, mask_bits 2125 2126 return None, None 2127 2128 2129def _get_subnet_group_for_host_ip(host_ip, all_subnets=()): 2130 """Get subnet group for a given host IP. 2131 2132 All subnets in the group are reachable from the input host ip. 2133 2134 @param host_ip: the IP of a DUT. 2135 @param all_subnets: A two level list of subnets including singleton 2136 lists of a restricted subnet and p2p subnets. 2137 2138 @return: a list of (subnet_ip, mask_bits) tuple. If no matched subnets for 2139 the host_ip, return []. 2140 """ 2141 for subnet_group in all_subnets: 2142 subnet, _ = _get_subnet_for_host_ip(host_ip, 2143 restricted_subnets=subnet_group) 2144 if subnet: 2145 return subnet_group 2146 return [] 2147 2148 2149def get_least_loaded_devserver(devserver_type=ImageServer, hostname=None): 2150 """Get the devserver with the least load. 2151 2152 Iterate through all devservers and get the one with least load. 2153 2154 TODO(crbug.com/486278): Devserver with required build already staged should 2155 take higher priority. This will need check_health call to be able to verify 2156 existence of a given build/artifact. Also, in case all devservers are 2157 overloaded, the logic here should fall back to the old behavior that randomly 2158 selects a devserver based on the hash of the image name/url. 2159 2160 @param devserver_type: Type of devserver to select from. Default is set to 2161 ImageServer. 2162 @param hostname: Hostname of the dut that the devserver is used for. The 2163 picked devserver needs to respect the location of the host if 2164 `prefer_local_devserver` is set to True or `restricted_subnets` is 2165 set. 2166 2167 @return: Name of the devserver with the least load. 2168 2169 """ 2170 logging.debug('Get the least loaded %r', devserver_type) 2171 devservers, can_retry = devserver_type.get_available_devservers( 2172 hostname) 2173 # If no healthy devservers available and can_retry is False, return None. 2174 # Otherwise, relax the constrain on hostname, allow all devservers to be 2175 # available. 2176 if not devserver_type.get_healthy_devserver('', devservers): 2177 if not can_retry: 2178 return None 2179 else: 2180 devservers, _ = devserver_type.get_available_devservers() 2181 2182 # get_devserver_load call needs to be made in a new process to allow force 2183 # timeout using signal. 2184 output = multiprocessing.Queue() 2185 processes = [] 2186 for devserver in devservers: 2187 processes.append(multiprocessing.Process( 2188 target=devserver_type.get_devserver_load_wrapper, 2189 args=(devserver, TIMEOUT_GET_DEVSERVER_LOAD, output))) 2190 2191 for p in processes: 2192 p.start() 2193 for p in processes: 2194 # The timeout for the process commands aren't reliable. Add 2195 # some extra time to the timeout for potential overhead in the 2196 # subprocesses. crbug.com/913695 2197 p.join(TIMEOUT_GET_DEVSERVER_LOAD + 10) 2198 # Read queue before killing processes to avoid corrupting the queue. 2199 loads = [output.get() for p in processes if not p.is_alive()] 2200 for p in processes: 2201 if p.is_alive(): 2202 p.terminate() 2203 # Filter out any load failed to be retrieved or does not support load check. 2204 loads = [load for load in loads if load and DevServer.CPU_LOAD in load and 2205 DevServer.is_free_disk_ok(load) and 2206 DevServer.is_apache_client_count_ok(load)] 2207 if not loads: 2208 logging.debug('Failed to retrieve load stats from any devserver. No ' 2209 'load balancing can be applied.') 2210 return None 2211 loads = [load for load in loads if _is_load_healthy(load)] 2212 if not loads: 2213 logging.error('No devserver has the capacity to be selected.') 2214 return None 2215 loads = sorted(loads, cmp=_compare_load) 2216 return loads[0]['devserver'] 2217 2218 2219def resolve(build, hostname=None, ban_list=None): 2220 """Resolve a devserver can be used for given build and hostname. 2221 2222 @param build: Name of a build to stage on devserver, e.g., 2223 ChromeOS build: daisy-release/R50-1234.0.0 2224 Launch Control build: git_mnc_release/shamu-eng 2225 @param hostname: Hostname of a devserver for, default is None, which means 2226 devserver is not restricted by the network location of the host. 2227 @param ban_list: The ban_list of devservers shouldn't be chosen. 2228 2229 @return: A DevServer instance that can be used to stage given build for the 2230 given host. 2231 """ 2232 if utils.is_launch_control_build(build): 2233 return AndroidBuildServer.resolve(build, hostname) 2234 else: 2235 return ImageServer.resolve(build, hostname, ban_list=ban_list) 2236