1*9c5db199SXin Li# Lint as: python2, python3 2*9c5db199SXin Li# Copyright (c) 2014 The Chromium OS Authors. All rights reserved. 3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be 4*9c5db199SXin Li# found in the LICENSE file. 5*9c5db199SXin Li 6*9c5db199SXin Lifrom __future__ import absolute_import 7*9c5db199SXin Lifrom __future__ import division 8*9c5db199SXin Lifrom __future__ import print_function 9*9c5db199SXin Li 10*9c5db199SXin Liimport logging 11*9c5db199SXin Liimport os 12*9c5db199SXin Liimport re 13*9c5db199SXin Liimport time 14*9c5db199SXin Li 15*9c5db199SXin Liimport common 16*9c5db199SXin Lifrom autotest_lib.client.common_lib import error, global_config 17*9c5db199SXin Lifrom autotest_lib.client.common_lib.cros import retry 18*9c5db199SXin Lifrom autotest_lib.server.cros.dynamic_suite import frontend_wrappers 19*9c5db199SXin Lifrom autotest_lib.server.hosts import cros_host 20*9c5db199SXin Lifrom autotest_lib.server.hosts import cros_repair 21*9c5db199SXin Li 22*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import timeout_util 23*9c5db199SXin Liimport six 24*9c5db199SXin Li 25*9c5db199SXin LiAUTOTEST_INSTALL_DIR = global_config.global_config.get_config_value( 26*9c5db199SXin Li 'SCHEDULER', 'drone_installation_directory') 27*9c5db199SXin Li 28*9c5db199SXin Li#'/usr/local/autotest' 29*9c5db199SXin LiSHADOW_CONFIG_PATH = '%s/shadow_config.ini' % AUTOTEST_INSTALL_DIR 30*9c5db199SXin LiATEST_PATH = '%s/cli/atest' % AUTOTEST_INSTALL_DIR 31*9c5db199SXin Li 32*9c5db199SXin Li# Sample output of fping that we are matching against, the fping command 33*9c5db199SXin Li# will return 10 lines but they will be one of these two formats. 34*9c5db199SXin Li# We want to get the IP address for the first line and not match the 35*9c5db199SXin Li# second line that has a non 0 %loss. 36*9c5db199SXin Li#192.168.231.100 : xmt/rcv/%loss = 10/10/0%, min/avg/max = 0.68/0.88/1.13 37*9c5db199SXin Li#192.168.231.102 : xmt/rcv/%loss = 10/0/100% 38*9c5db199SXin LiSUBNET_DUT_SEARCH_RE = (r'(?P<ip>192.168.231.1[0-1][0-9]) : ' 39*9c5db199SXin Li 'xmt\/rcv\/%loss = [0-9]+\/[0-9]+\/0%') 40*9c5db199SXin Li 41*9c5db199SXin LiMOBLAB_HOME = '/home/moblab' 42*9c5db199SXin LiMOBLAB_BOTO_LOCATION = '%s/.boto' % MOBLAB_HOME 43*9c5db199SXin LiMOBLAB_LAUNCH_CONTROL_KEY_LOCATION = '%s/.launch_control_key' % MOBLAB_HOME 44*9c5db199SXin LiMOBLAB_SERVICE_ACCOUNT_LOCATION = '%s/.service_account.json' % MOBLAB_HOME 45*9c5db199SXin LiMOBLAB_AUTODIR = '/usr/local/autodir' 46*9c5db199SXin LiDHCPD_LEASE_FILE = '/var/lib/dhcp/dhcpd.leases' 47*9c5db199SXin LiMOBLAB_SERVICES = ['moblab-scheduler-init', 48*9c5db199SXin Li 'moblab-database-init', 49*9c5db199SXin Li 'moblab-devserver-init', 50*9c5db199SXin Li 'moblab-gsoffloader-init', 51*9c5db199SXin Li 'moblab-gsoffloader_s-init'] 52*9c5db199SXin LiMOBLAB_PROCESSES = ['apache2', 'dhcpd'] 53*9c5db199SXin LiDUT_VERIFY_SLEEP_SECS = 5 54*9c5db199SXin LiDUT_VERIFY_TIMEOUT = 15 * 60 55*9c5db199SXin LiMOBLAB_TMP_DIR = '/mnt/moblab/tmp' 56*9c5db199SXin LiMOBLAB_PORT = 80 57*9c5db199SXin Li 58*9c5db199SXin Li 59*9c5db199SXin Liclass UpstartServiceNotRunning(error.AutoservError): 60*9c5db199SXin Li """An expected upstart service was not in the expected state.""" 61*9c5db199SXin Li 62*9c5db199SXin Li def __init__(self, service_name): 63*9c5db199SXin Li """Create us. 64*9c5db199SXin Li @param service_name: Name of the service_name that was in the worng 65*9c5db199SXin Li state. 66*9c5db199SXin Li """ 67*9c5db199SXin Li super(UpstartServiceNotRunning, self).__init__( 68*9c5db199SXin Li 'Upstart service %s not in running state. Most likely this ' 69*9c5db199SXin Li 'means moblab did not boot correctly, check the boot logs ' 70*9c5db199SXin Li 'for detailed error messages as to see why this service was ' 71*9c5db199SXin Li 'not started.' % 72*9c5db199SXin Li service_name) 73*9c5db199SXin Li 74*9c5db199SXin Li 75*9c5db199SXin Liclass MoblabHost(cros_host.CrosHost): 76*9c5db199SXin Li """Moblab specific host class.""" 77*9c5db199SXin Li 78*9c5db199SXin Li 79*9c5db199SXin Li def _initialize_frontend_rpcs(self, timeout_min): 80*9c5db199SXin Li """Initialize frontends for AFE and TKO for a moblab host. 81*9c5db199SXin Li 82*9c5db199SXin Li We tunnel all communication to the frontends through an SSH tunnel as 83*9c5db199SXin Li many testing environments block everything except SSH access to the 84*9c5db199SXin Li moblab DUT. 85*9c5db199SXin Li 86*9c5db199SXin Li @param timeout_min: The timeout minuties for AFE services. 87*9c5db199SXin Li """ 88*9c5db199SXin Li web_address = self.rpc_server_tracker.tunnel_connect(MOBLAB_PORT) 89*9c5db199SXin Li # Pass timeout_min to self.afe 90*9c5db199SXin Li self.afe = frontend_wrappers.RetryingAFE(timeout_min=timeout_min, 91*9c5db199SXin Li user='moblab', 92*9c5db199SXin Li server=web_address) 93*9c5db199SXin Li # Use default timeout_min of MoblabHost for self.tko 94*9c5db199SXin Li self.tko = frontend_wrappers.RetryingTKO(timeout_min=self.timeout_min, 95*9c5db199SXin Li user='moblab', 96*9c5db199SXin Li server=web_address) 97*9c5db199SXin Li 98*9c5db199SXin Li 99*9c5db199SXin Li def _initialize(self, *args, **dargs): 100*9c5db199SXin Li super(MoblabHost, self)._initialize(*args, **dargs) 101*9c5db199SXin Li # TODO(jrbarnette): Our superclass already initialized 102*9c5db199SXin Li # _repair_strategy, and now we're re-initializing it here. 103*9c5db199SXin Li # That's awkward, if not actually wrong. 104*9c5db199SXin Li self._repair_strategy = cros_repair.create_moblab_repair_strategy() 105*9c5db199SXin Li self.timeout_min = dargs.get('rpc_timeout_min', 1) 106*9c5db199SXin Li self._initialize_frontend_rpcs(self.timeout_min) 107*9c5db199SXin Li 108*9c5db199SXin Li 109*9c5db199SXin Li @staticmethod 110*9c5db199SXin Li def check_host(host, timeout=10): 111*9c5db199SXin Li """ 112*9c5db199SXin Li Check if the given host is an moblab host. 113*9c5db199SXin Li 114*9c5db199SXin Li @param host: An ssh host representing a device. 115*9c5db199SXin Li @param timeout: The timeout for the run command. 116*9c5db199SXin Li 117*9c5db199SXin Li 118*9c5db199SXin Li @return: True if the host device has adb. 119*9c5db199SXin Li 120*9c5db199SXin Li @raises AutoservRunError: If the command failed. 121*9c5db199SXin Li @raises AutoservSSHTimeout: Ssh connection has timed out. 122*9c5db199SXin Li """ 123*9c5db199SXin Li return False 124*9c5db199SXin Li 125*9c5db199SXin Li 126*9c5db199SXin Li def install_boto_file(self, boto_path=''): 127*9c5db199SXin Li """Install a boto file on the Moblab device. 128*9c5db199SXin Li 129*9c5db199SXin Li @param boto_path: Path to the boto file to install. If None, sends the 130*9c5db199SXin Li boto file in the current HOME directory. 131*9c5db199SXin Li 132*9c5db199SXin Li @raises error.TestError if the boto file does not exist. 133*9c5db199SXin Li """ 134*9c5db199SXin Li if not boto_path: 135*9c5db199SXin Li boto_path = os.path.join(os.getenv('HOME'), '.boto') 136*9c5db199SXin Li if not os.path.exists(boto_path): 137*9c5db199SXin Li raise error.TestError('Boto File:%s does not exist.' % boto_path) 138*9c5db199SXin Li self.send_file(boto_path, MOBLAB_BOTO_LOCATION) 139*9c5db199SXin Li self.run('chown moblab:moblab %s' % MOBLAB_BOTO_LOCATION) 140*9c5db199SXin Li 141*9c5db199SXin Li 142*9c5db199SXin Li def get_autodir(self): 143*9c5db199SXin Li """Return the directory to install autotest for client side tests.""" 144*9c5db199SXin Li return self.autodir or MOBLAB_AUTODIR 145*9c5db199SXin Li 146*9c5db199SXin Li 147*9c5db199SXin Li def run_as_moblab(self, command, **kwargs): 148*9c5db199SXin Li """Moblab commands should be ran as the moblab user not root. 149*9c5db199SXin Li 150*9c5db199SXin Li @param command: Command to run as user moblab. 151*9c5db199SXin Li """ 152*9c5db199SXin Li command = "su - moblab -c '%s'" % command 153*9c5db199SXin Li return self.run(command, **kwargs) 154*9c5db199SXin Li 155*9c5db199SXin Li 156*9c5db199SXin Li def wait_afe_up(self, timeout_min=5): 157*9c5db199SXin Li """Wait till the AFE is up and loaded. 158*9c5db199SXin Li 159*9c5db199SXin Li Attempt to reach the Moblab's AFE and database through its RPC 160*9c5db199SXin Li interface. 161*9c5db199SXin Li 162*9c5db199SXin Li @param timeout_min: Minutes to wait for the AFE to respond. Default is 163*9c5db199SXin Li 5 minutes. 164*9c5db199SXin Li 165*9c5db199SXin Li @raises urllib2.HTTPError if AFE does not respond within the timeout. 166*9c5db199SXin Li """ 167*9c5db199SXin Li # Use moblabhost's own AFE object with a longer timeout to wait for the 168*9c5db199SXin Li # AFE to load. Also re-create the ssh tunnel for connections to moblab. 169*9c5db199SXin Li # Set the timeout_min to be longer than self.timeout_min for rebooting. 170*9c5db199SXin Li self._initialize_frontend_rpcs(timeout_min) 171*9c5db199SXin Li # Verify the AFE can handle a simple request. 172*9c5db199SXin Li self._check_afe() 173*9c5db199SXin Li # Reset the timeout_min after rebooting checks for afe services. 174*9c5db199SXin Li self.afe.set_timeout(self.timeout_min) 175*9c5db199SXin Li 176*9c5db199SXin Li 177*9c5db199SXin Li def add_dut(self, hostname): 178*9c5db199SXin Li """Add a DUT hostname to the AFE. 179*9c5db199SXin Li 180*9c5db199SXin Li @param hostname: DUT hostname to add. 181*9c5db199SXin Li """ 182*9c5db199SXin Li result = self.run_as_moblab('%s host create %s' % (ATEST_PATH, 183*9c5db199SXin Li hostname)) 184*9c5db199SXin Li logging.debug('atest host create output for host %s:\n%s', 185*9c5db199SXin Li hostname, result.stdout) 186*9c5db199SXin Li 187*9c5db199SXin Li 188*9c5db199SXin Li def find_and_add_duts(self): 189*9c5db199SXin Li """Discover DUTs on the testing subnet and add them to the AFE. 190*9c5db199SXin Li 191*9c5db199SXin Li Pings the range of IP's a DUT might be assigned by moblab, then 192*9c5db199SXin Li parses the output to discover connected DUTs, connected means 193*9c5db199SXin Li they have 0% dropped pings. 194*9c5db199SXin Li If they are not already in the AFE, adds them to AFE. 195*9c5db199SXin Li """ 196*9c5db199SXin Li existing_hosts = [host.hostname for host in self.afe.get_hosts()] 197*9c5db199SXin Li fping_result = self.run('fping -g 192.168.231.100 192.168.231.110 ' 198*9c5db199SXin Li '-a -c 10 -p 30 -q', ignore_status=True) 199*9c5db199SXin Li for line in fping_result.stderr.splitlines(): 200*9c5db199SXin Li match = re.match(SUBNET_DUT_SEARCH_RE, line) 201*9c5db199SXin Li if match: 202*9c5db199SXin Li dut_ip = match.group('ip') 203*9c5db199SXin Li if dut_ip in existing_hosts: 204*9c5db199SXin Li break 205*9c5db199SXin Li if self._check_dut_ssh(dut_ip): 206*9c5db199SXin Li self.add_dut(dut_ip) 207*9c5db199SXin Li existing_hosts.append(dut_ip) 208*9c5db199SXin Li 209*9c5db199SXin Li def _check_dut_ssh(self, dut_ip): 210*9c5db199SXin Li is_sshable = False 211*9c5db199SXin Li count = 0 212*9c5db199SXin Li while not is_sshable and count < 10: 213*9c5db199SXin Li cmd = ('ssh -o ConnectTimeout=30 -o ConnectionAttempts=30' 214*9c5db199SXin Li ' root@%s echo Testing' % dut_ip) 215*9c5db199SXin Li result = self.run(cmd) 216*9c5db199SXin Li is_sshable = 'Testing' in result.stdout 217*9c5db199SXin Li logging.info(is_sshable) 218*9c5db199SXin Li count += 1 219*9c5db199SXin Li return is_sshable 220*9c5db199SXin Li 221*9c5db199SXin Li def verify_software(self): 222*9c5db199SXin Li """Create the autodir then do standard verify.""" 223*9c5db199SXin Li # In case cleanup or powerwash wiped the autodir, create an empty 224*9c5db199SXin Li # directory. 225*9c5db199SXin Li # Removing this mkdir command will result in the disk size check 226*9c5db199SXin Li # not being performed. 227*9c5db199SXin Li self.run('mkdir -p %s' % MOBLAB_AUTODIR) 228*9c5db199SXin Li super(MoblabHost, self).verify_software() 229*9c5db199SXin Li 230*9c5db199SXin Li 231*9c5db199SXin Li def _verify_upstart_service(self, service, timeout_m): 232*9c5db199SXin Li """Verify that the given moblab service is running. 233*9c5db199SXin Li 234*9c5db199SXin Li @param service: The upstart service to check for. 235*9c5db199SXin Li @timeout_m: Timeout (in minuts) before giving up. 236*9c5db199SXin Li @raises TimeoutException or UpstartServiceNotRunning if service isn't 237*9c5db199SXin Li running. 238*9c5db199SXin Li """ 239*9c5db199SXin Li @retry.retry(error.AutoservError, timeout_min=timeout_m, delay_sec=10) 240*9c5db199SXin Li def _verify(): 241*9c5db199SXin Li if not self.upstart_status(service): 242*9c5db199SXin Li raise UpstartServiceNotRunning(service) 243*9c5db199SXin Li _verify() 244*9c5db199SXin Li 245*9c5db199SXin Li def verify_moblab_services(self, timeout_m): 246*9c5db199SXin Li """Verify the required Moblab services are up and running. 247*9c5db199SXin Li 248*9c5db199SXin Li @param timeout_m: Timeout (in minutes) for how long to wait for services 249*9c5db199SXin Li to start. Actual time taken may be slightly more than this. 250*9c5db199SXin Li @raises AutoservError if any moblab service is not running. 251*9c5db199SXin Li """ 252*9c5db199SXin Li if not MOBLAB_SERVICES: 253*9c5db199SXin Li return 254*9c5db199SXin Li 255*9c5db199SXin Li service = MOBLAB_SERVICES[0] 256*9c5db199SXin Li try: 257*9c5db199SXin Li # First service can take a long time to start, especially on first 258*9c5db199SXin Li # boot where container setup can take 5-10 minutes, depending on the 259*9c5db199SXin Li # device. 260*9c5db199SXin Li self._verify_upstart_service(service, timeout_m) 261*9c5db199SXin Li except error.TimeoutException: 262*9c5db199SXin Li raise UpstartServiceNotRunning(service) 263*9c5db199SXin Li 264*9c5db199SXin Li for service in MOBLAB_SERVICES[1:]: 265*9c5db199SXin Li try: 266*9c5db199SXin Li # Follow up services should come up quickly. 267*9c5db199SXin Li self._verify_upstart_service(service, 0.5) 268*9c5db199SXin Li except error.TimeoutException: 269*9c5db199SXin Li raise UpstartServiceNotRunning(service) 270*9c5db199SXin Li 271*9c5db199SXin Li for process in MOBLAB_PROCESSES: 272*9c5db199SXin Li try: 273*9c5db199SXin Li self.run('pgrep %s' % process) 274*9c5db199SXin Li except error.AutoservRunError: 275*9c5db199SXin Li raise error.AutoservError('Moblab process: %s is not running.' 276*9c5db199SXin Li % process) 277*9c5db199SXin Li 278*9c5db199SXin Li 279*9c5db199SXin Li def _check_afe(self): 280*9c5db199SXin Li """Verify whether afe of moblab works before verifying its DUTs. 281*9c5db199SXin Li 282*9c5db199SXin Li Verifying moblab sometimes happens after a successful provision, in 283*9c5db199SXin Li which case moblab is restarted but tunnel of afe is not re-connected. 284*9c5db199SXin Li This func is used to check whether afe is working now. 285*9c5db199SXin Li 286*9c5db199SXin Li @return True if afe works. 287*9c5db199SXin Li @raises error.AutoservError if AFE is down; other exceptions are passed 288*9c5db199SXin Li through. 289*9c5db199SXin Li """ 290*9c5db199SXin Li try: 291*9c5db199SXin Li self.afe.get_hosts() 292*9c5db199SXin Li except (error.TimeoutException, timeout_util.TimeoutError) as e: 293*9c5db199SXin Li raise error.AutoservError('Moblab AFE is not responding: %s' % 294*9c5db199SXin Li str(e)) 295*9c5db199SXin Li except Exception as e: 296*9c5db199SXin Li logging.error('Unknown exception when checking moblab AFE: %s', e) 297*9c5db199SXin Li raise 298*9c5db199SXin Li 299*9c5db199SXin Li return True 300*9c5db199SXin Li 301*9c5db199SXin Li 302*9c5db199SXin Li def verify_duts(self): 303*9c5db199SXin Li """Verify the Moblab DUTs are up and running. 304*9c5db199SXin Li 305*9c5db199SXin Li @raises AutoservError if no DUTs are in the Ready State. 306*9c5db199SXin Li """ 307*9c5db199SXin Li hosts = self.afe.reverify_hosts() 308*9c5db199SXin Li logging.debug('DUTs scheduled for reverification: %s', hosts) 309*9c5db199SXin Li 310*9c5db199SXin Li 311*9c5db199SXin Li def verify_special_tasks_complete(self): 312*9c5db199SXin Li """Wait till the special tasks on the moblab host are complete.""" 313*9c5db199SXin Li total_time = 0 314*9c5db199SXin Li while (self.afe.get_special_tasks(is_complete=False) and 315*9c5db199SXin Li total_time < DUT_VERIFY_TIMEOUT): 316*9c5db199SXin Li total_time = total_time + DUT_VERIFY_SLEEP_SECS 317*9c5db199SXin Li time.sleep(DUT_VERIFY_SLEEP_SECS) 318*9c5db199SXin Li if not self.afe.get_hosts(status='Ready'): 319*9c5db199SXin Li for host in self.afe.get_hosts(): 320*9c5db199SXin Li logging.error('DUT: %s Status: %s', host, host.status) 321*9c5db199SXin Li raise error.AutoservError('Moblab has 0 Ready DUTs') 322*9c5db199SXin Li 323*9c5db199SXin Li 324*9c5db199SXin Li def get_platform(self): 325*9c5db199SXin Li """Determine the correct platform label for this host. 326*9c5db199SXin Li 327*9c5db199SXin Li For Moblab devices '_moblab' is appended. 328*9c5db199SXin Li 329*9c5db199SXin Li @returns a string representing this host's platform. 330*9c5db199SXin Li """ 331*9c5db199SXin Li return super(MoblabHost, self).get_platform() + '_moblab' 332*9c5db199SXin Li 333*9c5db199SXin Li 334*9c5db199SXin Li def make_tmp_dir(self, base=MOBLAB_TMP_DIR): 335*9c5db199SXin Li """Creates a temporary directory. 336*9c5db199SXin Li 337*9c5db199SXin Li @param base: The directory where it should be created. 338*9c5db199SXin Li 339*9c5db199SXin Li @return Path to a newly created temporary directory. 340*9c5db199SXin Li """ 341*9c5db199SXin Li self.run('mkdir -p %s' % base) 342*9c5db199SXin Li return self.run('mktemp -d -p %s' % base).stdout.strip() 343*9c5db199SXin Li 344*9c5db199SXin Li 345*9c5db199SXin Li def get_os_type(self): 346*9c5db199SXin Li return 'moblab' 347