xref: /aosp_15_r20/external/autotest/server/hosts/base_servohost.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li# Lint as: python2, python3
2*9c5db199SXin Li# Copyright (c) 2019 The Chromium OS Authors. All rights reserved.
3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
4*9c5db199SXin Li# found in the LICENSE file.
5*9c5db199SXin Li#
6*9c5db199SXin Li# Expects to be run in an environment with sudo and no interactive password
7*9c5db199SXin Li# prompt, such as within the Chromium OS development chroot.
8*9c5db199SXin Li
9*9c5db199SXin Li
10*9c5db199SXin Li"""This is a base host class for servohost and labstation."""
11*9c5db199SXin Li
12*9c5db199SXin Li
13*9c5db199SXin Liimport six.moves.http_client
14*9c5db199SXin Liimport logging
15*9c5db199SXin Liimport socket
16*9c5db199SXin Liimport six.moves.xmlrpc_client
17*9c5db199SXin Liimport time
18*9c5db199SXin Liimport os
19*9c5db199SXin Li
20*9c5db199SXin Litry:
21*9c5db199SXin Li    import docker
22*9c5db199SXin Li    from autotest_lib.site_utils.docker import utils as docker_utils
23*9c5db199SXin Liexcept ImportError:
24*9c5db199SXin Li    logging.info("Docker API is not installed in this environment")
25*9c5db199SXin Li
26*9c5db199SXin Lifrom autotest_lib.client.bin import utils
27*9c5db199SXin Lifrom autotest_lib.client.common_lib import autotest_enum
28*9c5db199SXin Lifrom autotest_lib.client.common_lib import error
29*9c5db199SXin Lifrom autotest_lib.client.common_lib import hosts
30*9c5db199SXin Lifrom autotest_lib.client.common_lib import lsbrelease_utils
31*9c5db199SXin Lifrom autotest_lib.client.common_lib.cros import dev_server
32*9c5db199SXin Lifrom autotest_lib.client.common_lib.cros import kernel_utils
33*9c5db199SXin Lifrom autotest_lib.client.cros import constants as client_constants
34*9c5db199SXin Lifrom autotest_lib.server import autotest
35*9c5db199SXin Lifrom autotest_lib.server import site_utils as server_utils
36*9c5db199SXin Lifrom autotest_lib.server.cros import provisioner
37*9c5db199SXin Lifrom autotest_lib.server.hosts import ssh_host
38*9c5db199SXin Lifrom autotest_lib.site_utils.rpm_control_system import rpm_client
39*9c5db199SXin Li
40*9c5db199SXin Li
41*9c5db199SXin Liclass BaseServoHost(ssh_host.SSHHost):
42*9c5db199SXin Li    """Base host class for a host that manage servo(s).
43*9c5db199SXin Li     E.g. beaglebone, labstation.
44*9c5db199SXin Li    """
45*9c5db199SXin Li    REBOOT_CMD = 'sleep 5; reboot & sleep 10; reboot -f'
46*9c5db199SXin Li
47*9c5db199SXin Li    TEMP_FILE_DIR = '/var/lib/servod/'
48*9c5db199SXin Li
49*9c5db199SXin Li    LOCK_FILE_POSTFIX = '_in_use'
50*9c5db199SXin Li    REBOOT_FILE_POSTFIX = '_reboot'
51*9c5db199SXin Li
52*9c5db199SXin Li    # Time to wait a rebooting servohost, in seconds.
53*9c5db199SXin Li    REBOOT_TIMEOUT = 240
54*9c5db199SXin Li
55*9c5db199SXin Li    # Timeout value to power cycle a servohost, in seconds.
56*9c5db199SXin Li    BOOT_TIMEOUT = 240
57*9c5db199SXin Li
58*9c5db199SXin Li    # Constants that reflect current host update state.
59*9c5db199SXin Li    UPDATE_STATE = autotest_enum.AutotestEnum('IDLE', 'RUNNING',
60*9c5db199SXin Li                                              'PENDING_REBOOT')
61*9c5db199SXin Li
62*9c5db199SXin Li    def _initialize(self,
63*9c5db199SXin Li                    hostname,
64*9c5db199SXin Li                    is_in_lab=None,
65*9c5db199SXin Li                    servo_host_ssh_port=None,
66*9c5db199SXin Li                    servod_docker=None,
67*9c5db199SXin Li                    *args,
68*9c5db199SXin Li                    **dargs):
69*9c5db199SXin Li        """Construct a BaseServoHost object.
70*9c5db199SXin Li
71*9c5db199SXin Li        @param is_in_lab: True if the servo host is in Cros Lab. Default is set
72*9c5db199SXin Li                          to None, for which utils.host_is_in_lab_zone will be
73*9c5db199SXin Li                          called to check if the servo host is in Cros lab.
74*9c5db199SXin Li
75*9c5db199SXin Li        """
76*9c5db199SXin Li        if servo_host_ssh_port is not None:
77*9c5db199SXin Li            dargs['port'] = int(servo_host_ssh_port)
78*9c5db199SXin Li
79*9c5db199SXin Li        super(BaseServoHost, self)._initialize(hostname=hostname,
80*9c5db199SXin Li                                               *args, **dargs)
81*9c5db199SXin Li
82*9c5db199SXin Li        self.servod_container_name = None
83*9c5db199SXin Li        self._is_containerized_servod = False
84*9c5db199SXin Li        if bool(servod_docker):
85*9c5db199SXin Li            self._is_containerized_servod = True
86*9c5db199SXin Li            self.servod_container_name = servod_docker
87*9c5db199SXin Li        elif self.hostname.endswith('docker_servod'):
88*9c5db199SXin Li            # For backward compatibility
89*9c5db199SXin Li            self.servod_container_name = self.hostname
90*9c5db199SXin Li            self._is_containerized_servod = True
91*9c5db199SXin Li
92*9c5db199SXin Li        self._is_localhost = (self.hostname == 'localhost'
93*9c5db199SXin Li                              and servo_host_ssh_port is None)
94*9c5db199SXin Li        if self._is_localhost or self._is_containerized_servod:
95*9c5db199SXin Li            self._is_in_lab = False
96*9c5db199SXin Li        elif is_in_lab is None:
97*9c5db199SXin Li            self._is_in_lab = (utils.host_is_in_lab_zone(self.hostname)
98*9c5db199SXin Li                               or self.is_satlab())
99*9c5db199SXin Li        else:
100*9c5db199SXin Li            self._is_in_lab = is_in_lab
101*9c5db199SXin Li
102*9c5db199SXin Li        # Commands on the servo host must be run by the superuser.
103*9c5db199SXin Li        # Our account on a remote host is root, but if our target is
104*9c5db199SXin Li        # localhost then we might be running unprivileged.  If so,
105*9c5db199SXin Li        # `sudo` will have to be added to the commands.
106*9c5db199SXin Li        if self._is_localhost:
107*9c5db199SXin Li            self._sudo_required = utils.system_output('id -u') != '0'
108*9c5db199SXin Li        else:
109*9c5db199SXin Li            self._sudo_required = False
110*9c5db199SXin Li
111*9c5db199SXin Li        self._is_labstation = None
112*9c5db199SXin Li        self._dut_host_info = None
113*9c5db199SXin Li        self._dut_hostname = None
114*9c5db199SXin Li
115*9c5db199SXin Li
116*9c5db199SXin Li    def get_board(self):
117*9c5db199SXin Li        """Determine the board for this servo host. E.g. fizz-labstation
118*9c5db199SXin Li
119*9c5db199SXin Li        @returns a string representing this labstation's board or None if
120*9c5db199SXin Li         target host is not using a ChromeOS image(e.g. test in chroot).
121*9c5db199SXin Li        """
122*9c5db199SXin Li        output = self.run('cat /etc/lsb-release', ignore_status=True).stdout
123*9c5db199SXin Li        return lsbrelease_utils.get_current_board(lsb_release_content=output)
124*9c5db199SXin Li
125*9c5db199SXin Li
126*9c5db199SXin Li    def set_dut_host_info(self, dut_host_info):
127*9c5db199SXin Li        """
128*9c5db199SXin Li        @param dut_host_info: A HostInfo object.
129*9c5db199SXin Li        """
130*9c5db199SXin Li        logging.info('setting dut_host_info field to (%s)', dut_host_info)
131*9c5db199SXin Li        self._dut_host_info = dut_host_info
132*9c5db199SXin Li
133*9c5db199SXin Li
134*9c5db199SXin Li    def get_dut_host_info(self):
135*9c5db199SXin Li        """
136*9c5db199SXin Li        @return A HostInfo object.
137*9c5db199SXin Li        """
138*9c5db199SXin Li        return self._dut_host_info
139*9c5db199SXin Li
140*9c5db199SXin Li
141*9c5db199SXin Li    def set_dut_hostname(self, dut_hostname):
142*9c5db199SXin Li        """
143*9c5db199SXin Li        @param dut_hostname: hostname of the DUT that connected to this servo.
144*9c5db199SXin Li        """
145*9c5db199SXin Li        logging.info('setting dut_hostname as (%s)', dut_hostname)
146*9c5db199SXin Li        self._dut_hostname = dut_hostname
147*9c5db199SXin Li
148*9c5db199SXin Li
149*9c5db199SXin Li    def get_dut_hostname(self):
150*9c5db199SXin Li        """
151*9c5db199SXin Li        @returns hostname of the DUT that connected to this servo.
152*9c5db199SXin Li        """
153*9c5db199SXin Li        return self._dut_hostname
154*9c5db199SXin Li
155*9c5db199SXin Li
156*9c5db199SXin Li    def is_labstation(self):
157*9c5db199SXin Li        """Determine if the host is a labstation
158*9c5db199SXin Li
159*9c5db199SXin Li        @returns True if ths host is a labstation otherwise False.
160*9c5db199SXin Li        """
161*9c5db199SXin Li        if self.is_containerized_servod():
162*9c5db199SXin Li            return False
163*9c5db199SXin Li
164*9c5db199SXin Li        if self._is_labstation is None:
165*9c5db199SXin Li            if 'labstation' in self.hostname:
166*9c5db199SXin Li                logging.info('Based on hostname, the servohost is'
167*9c5db199SXin Li                             ' a labstation.')
168*9c5db199SXin Li                self._is_labstation = True
169*9c5db199SXin Li            else:
170*9c5db199SXin Li                logging.info(
171*9c5db199SXin Li                        'Cannot determine if %s is a labstation from'
172*9c5db199SXin Li                        ' hostname, getting board info from the'
173*9c5db199SXin Li                        ' servohost.', self.hostname)
174*9c5db199SXin Li                board = self.get_board()
175*9c5db199SXin Li                self._is_labstation = bool(board) and 'labstation' in board
176*9c5db199SXin Li
177*9c5db199SXin Li        return self._is_labstation
178*9c5db199SXin Li
179*9c5db199SXin Li
180*9c5db199SXin Li    def _get_lsb_release_content(self):
181*9c5db199SXin Li        """Return the content of lsb-release file of host."""
182*9c5db199SXin Li        return self.run(
183*9c5db199SXin Li            'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
184*9c5db199SXin Li
185*9c5db199SXin Li
186*9c5db199SXin Li    def get_release_version(self):
187*9c5db199SXin Li        """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
188*9c5db199SXin Li
189*9c5db199SXin Li        @returns The version string in lsb-release, under attribute
190*9c5db199SXin Li                 CHROMEOS_RELEASE_VERSION(e.g. 12900.0.0). None on fail.
191*9c5db199SXin Li        """
192*9c5db199SXin Li        return lsbrelease_utils.get_chromeos_release_version(
193*9c5db199SXin Li            lsb_release_content=self._get_lsb_release_content()
194*9c5db199SXin Li        )
195*9c5db199SXin Li
196*9c5db199SXin Li
197*9c5db199SXin Li    def get_full_release_path(self):
198*9c5db199SXin Li        """Get full release path from servohost as string.
199*9c5db199SXin Li
200*9c5db199SXin Li        @returns full release path as a string
201*9c5db199SXin Li                 (e.g. fizz-labstation-release/R82.12900.0.0). None on fail.
202*9c5db199SXin Li        """
203*9c5db199SXin Li        return lsbrelease_utils.get_chromeos_release_builder_path(
204*9c5db199SXin Li            lsb_release_content=self._get_lsb_release_content()
205*9c5db199SXin Li        )
206*9c5db199SXin Li
207*9c5db199SXin Li
208*9c5db199SXin Li    def _check_update_status(self):
209*9c5db199SXin Li        """ Check servohost's current update state.
210*9c5db199SXin Li
211*9c5db199SXin Li        @returns: one of below state of from self.UPDATE_STATE
212*9c5db199SXin Li            IDLE -- if the target host is not currently updating and not
213*9c5db199SXin Li                pending on a reboot.
214*9c5db199SXin Li            RUNNING -- if there is another updating process that running on
215*9c5db199SXin Li                target host(note: we don't expect to hit this scenario).
216*9c5db199SXin Li            PENDING_REBOOT -- if the target host had an update and pending
217*9c5db199SXin Li                on reboot.
218*9c5db199SXin Li        """
219*9c5db199SXin Li        result = self.run('pgrep -f quick-provision | grep -v $$',
220*9c5db199SXin Li                          ignore_status=True)
221*9c5db199SXin Li        # We don't expect any output unless there are another quick
222*9c5db199SXin Li        # provision process is running.
223*9c5db199SXin Li        if result.exit_status == 0:
224*9c5db199SXin Li            return self.UPDATE_STATE.RUNNING
225*9c5db199SXin Li
226*9c5db199SXin Li        # Determine if we have an update that pending on reboot by check if
227*9c5db199SXin Li        # the current inactive kernel has priority for the next boot.
228*9c5db199SXin Li        try:
229*9c5db199SXin Li            inactive_kernel = kernel_utils.get_kernel_state(self)[1]
230*9c5db199SXin Li            next_kernel = kernel_utils.get_next_kernel(self)
231*9c5db199SXin Li            if inactive_kernel == next_kernel:
232*9c5db199SXin Li                return self.UPDATE_STATE.PENDING_REBOOT
233*9c5db199SXin Li        except Exception as e:
234*9c5db199SXin Li            logging.error('Unexpected error while checking kernel info; %s', e)
235*9c5db199SXin Li        return self.UPDATE_STATE.IDLE
236*9c5db199SXin Li
237*9c5db199SXin Li
238*9c5db199SXin Li    def is_in_lab(self):
239*9c5db199SXin Li        """Check whether the servo host is a lab device.
240*9c5db199SXin Li
241*9c5db199SXin Li        @returns: True if the servo host is in Cros Lab, otherwise False.
242*9c5db199SXin Li
243*9c5db199SXin Li        """
244*9c5db199SXin Li        return self._is_in_lab
245*9c5db199SXin Li
246*9c5db199SXin Li
247*9c5db199SXin Li    def is_localhost(self):
248*9c5db199SXin Li        """Checks whether the servo host points to localhost.
249*9c5db199SXin Li
250*9c5db199SXin Li        @returns: True if it points to localhost, otherwise False.
251*9c5db199SXin Li
252*9c5db199SXin Li        """
253*9c5db199SXin Li        return self._is_localhost
254*9c5db199SXin Li
255*9c5db199SXin Li
256*9c5db199SXin Li    def is_containerized_servod(self):
257*9c5db199SXin Li        """Checks whether the servo host is a containerized servod.
258*9c5db199SXin Li
259*9c5db199SXin Li        @returns: True if using containerized servod, otherwise False.
260*9c5db199SXin Li
261*9c5db199SXin Li        """
262*9c5db199SXin Li        return self._is_containerized_servod
263*9c5db199SXin Li
264*9c5db199SXin Li    def is_cros_host(self):
265*9c5db199SXin Li        """Check if a servo host is running chromeos.
266*9c5db199SXin Li
267*9c5db199SXin Li        @return: True if the servo host is running chromeos.
268*9c5db199SXin Li            False if it isn't, or we don't have enough information.
269*9c5db199SXin Li        """
270*9c5db199SXin Li        if self.is_containerized_servod():
271*9c5db199SXin Li            return False
272*9c5db199SXin Li        try:
273*9c5db199SXin Li            result = self.run('grep -q CHROMEOS /etc/lsb-release',
274*9c5db199SXin Li                              ignore_status=True, timeout=10)
275*9c5db199SXin Li        except (error.AutoservRunError, error.AutoservSSHTimeout):
276*9c5db199SXin Li            return False
277*9c5db199SXin Li        return result.exit_status == 0
278*9c5db199SXin Li
279*9c5db199SXin Li
280*9c5db199SXin Li    def prepare_for_update(self):
281*9c5db199SXin Li        """Prepares the DUT for an update.
282*9c5db199SXin Li        Subclasses may override this to perform any special actions
283*9c5db199SXin Li        required before updating.
284*9c5db199SXin Li        """
285*9c5db199SXin Li        pass
286*9c5db199SXin Li
287*9c5db199SXin Li
288*9c5db199SXin Li    def reboot(self, *args, **dargs):
289*9c5db199SXin Li        """Reboot using special servo host reboot command."""
290*9c5db199SXin Li        super(BaseServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
291*9c5db199SXin Li                                          *args, **dargs)
292*9c5db199SXin Li
293*9c5db199SXin Li
294*9c5db199SXin Li    def update_image(self, stable_version=None):
295*9c5db199SXin Li        """Update the image on the servo host, if needed.
296*9c5db199SXin Li
297*9c5db199SXin Li        This method recognizes the following cases:
298*9c5db199SXin Li          * If the Host is not running ChromeOS, do nothing.
299*9c5db199SXin Li          * If a previously triggered update is now complete, reboot
300*9c5db199SXin Li            to the new version.
301*9c5db199SXin Li          * If the host is processing an update do nothing.
302*9c5db199SXin Li          * If the host has an update that pending on reboot, do nothing.
303*9c5db199SXin Li          * If the host is running a version of ChromeOS different
304*9c5db199SXin Li            from the default for servo Hosts, start an update.
305*9c5db199SXin Li
306*9c5db199SXin Li        @stable_version the target build number.(e.g. R82-12900.0.0)
307*9c5db199SXin Li
308*9c5db199SXin Li        @raises dev_server.DevServerException: If all the devservers are down.
309*9c5db199SXin Li        @raises site_utils.ParseBuildNameException: If the devserver returns
310*9c5db199SXin Li            an invalid build name.
311*9c5db199SXin Li        """
312*9c5db199SXin Li        # servod could be running in a Ubuntu workstation.
313*9c5db199SXin Li        if not self.is_cros_host():
314*9c5db199SXin Li            logging.info('Not attempting an update, either %s is not running '
315*9c5db199SXin Li                         'chromeos or we cannot find enough information about '
316*9c5db199SXin Li                         'the host.', self.hostname)
317*9c5db199SXin Li            return
318*9c5db199SXin Li
319*9c5db199SXin Li        if lsbrelease_utils.is_moblab():
320*9c5db199SXin Li            logging.info('Not attempting an update, %s is running moblab.',
321*9c5db199SXin Li                         self.hostname)
322*9c5db199SXin Li            return
323*9c5db199SXin Li
324*9c5db199SXin Li        if not stable_version:
325*9c5db199SXin Li            logging.debug("BaseServoHost::update_image attempting to get"
326*9c5db199SXin Li                          " servo cros stable version")
327*9c5db199SXin Li            try:
328*9c5db199SXin Li                stable_version = (self.get_dut_host_info().
329*9c5db199SXin Li                                  servo_cros_stable_version)
330*9c5db199SXin Li            except AttributeError:
331*9c5db199SXin Li                logging.error("BaseServoHost::update_image failed to get"
332*9c5db199SXin Li                              " servo cros stable version.")
333*9c5db199SXin Li
334*9c5db199SXin Li        target_build = "%s-release/%s" % (self.get_board(), stable_version)
335*9c5db199SXin Li        target_build_number = server_utils.ParseBuildName(
336*9c5db199SXin Li            target_build)[3]
337*9c5db199SXin Li        current_build_number = self.get_release_version()
338*9c5db199SXin Li
339*9c5db199SXin Li        if current_build_number == target_build_number:
340*9c5db199SXin Li            logging.info('servo host %s does not require an update.',
341*9c5db199SXin Li                         self.hostname)
342*9c5db199SXin Li            return
343*9c5db199SXin Li
344*9c5db199SXin Li        status = self._check_update_status()
345*9c5db199SXin Li        if status == self.UPDATE_STATE.RUNNING:
346*9c5db199SXin Li            logging.info('servo host %s already processing an update',
347*9c5db199SXin Li                         self.hostname)
348*9c5db199SXin Li            return
349*9c5db199SXin Li        if status == self.UPDATE_STATE.PENDING_REBOOT:
350*9c5db199SXin Li            # Labstation reboot is handled separately here as it require
351*9c5db199SXin Li            # synchronized reboot among all managed DUTs. For servo_v3, we'll
352*9c5db199SXin Li            # reboot when initialize Servohost, if there is a update pending.
353*9c5db199SXin Li            logging.info('An update has been completed and pending reboot.')
354*9c5db199SXin Li            return
355*9c5db199SXin Li
356*9c5db199SXin Li        ds = dev_server.ImageServer.resolve(self.hostname,
357*9c5db199SXin Li                                            hostname=self.hostname)
358*9c5db199SXin Li        url = ds.get_update_url(target_build)
359*9c5db199SXin Li        cros_provisioner = provisioner.ChromiumOSProvisioner(update_url=url,
360*9c5db199SXin Li                                                             host=self,
361*9c5db199SXin Li                                                             is_servohost=True)
362*9c5db199SXin Li        logging.info('Using devserver url: %s to trigger update on '
363*9c5db199SXin Li                     'servo host %s, from %s to %s', url, self.hostname,
364*9c5db199SXin Li                     current_build_number, target_build_number)
365*9c5db199SXin Li        cros_provisioner.run_provision()
366*9c5db199SXin Li
367*9c5db199SXin Li
368*9c5db199SXin Li    def has_power(self):
369*9c5db199SXin Li        """Return whether or not the servo host is powered by PoE or RPM."""
370*9c5db199SXin Li        # TODO(fdeng): See crbug.com/302791
371*9c5db199SXin Li        # For now, assume all servo hosts in the lab have power.
372*9c5db199SXin Li        return self.is_in_lab()
373*9c5db199SXin Li
374*9c5db199SXin Li
375*9c5db199SXin Li    def _post_update_reboot(self):
376*9c5db199SXin Li        """ Reboot servohost after an quick provision.
377*9c5db199SXin Li
378*9c5db199SXin Li        We need to do some specifal cleanup before and after reboot
379*9c5db199SXin Li        when there is an update pending.
380*9c5db199SXin Li        """
381*9c5db199SXin Li        # Regarding the 'crossystem' command below: In some cases,
382*9c5db199SXin Li        # the update flow puts the TPM into a state such that it
383*9c5db199SXin Li        # fails verification.  We don't know why.  However, this
384*9c5db199SXin Li        # call papers over the problem by clearing the TPM during
385*9c5db199SXin Li        # the reboot.
386*9c5db199SXin Li        #
387*9c5db199SXin Li        # We ignore failures from 'crossystem'.  Although failure
388*9c5db199SXin Li        # here is unexpected, and could signal a bug, the point of
389*9c5db199SXin Li        # the exercise is to paper over problems; allowing this to
390*9c5db199SXin Li        # fail would defeat the purpose.
391*9c5db199SXin Li
392*9c5db199SXin Li        # Preserve critical files before reboot since post-provision
393*9c5db199SXin Li        # clobbering will wipe the stateful partition.
394*9c5db199SXin Li        # TODO(xianuowang@) Remove this logic once we have updated to
395*9c5db199SXin Li        # a image with https://crrev.com/c/2485908.
396*9c5db199SXin Li        path_to_preserve = [
397*9c5db199SXin Li                '/var/lib/servod',
398*9c5db199SXin Li                '/var/lib/device_health_profile',
399*9c5db199SXin Li        ]
400*9c5db199SXin Li        safe_location = '/mnt/stateful_partition/unencrypted/preserve/'
401*9c5db199SXin Li        for item in path_to_preserve:
402*9c5db199SXin Li            dest = os.path.join(safe_location, item.split('/')[-1])
403*9c5db199SXin Li            self.run('rm -rf %s' % dest, ignore_status=True)
404*9c5db199SXin Li            self.run('mv %s %s' % (item, safe_location), ignore_status=True)
405*9c5db199SXin Li
406*9c5db199SXin Li        self.run('crossystem clear_tpm_owner_request=1', ignore_status=True)
407*9c5db199SXin Li        self._servo_host_reboot()
408*9c5db199SXin Li        logging.debug('Cleaning up autotest directories if exist.')
409*9c5db199SXin Li        try:
410*9c5db199SXin Li            installed_autodir = autotest.Autotest.get_installed_autodir(self)
411*9c5db199SXin Li            self.run('rm -rf ' + installed_autodir)
412*9c5db199SXin Li        except autotest.AutodirNotFoundError:
413*9c5db199SXin Li            logging.debug('No autotest installed directory found.')
414*9c5db199SXin Li
415*9c5db199SXin Li        # Recover preserved files to original location.
416*9c5db199SXin Li        # TODO(xianuowang@) Remove this logic once we have updated to
417*9c5db199SXin Li        # a image with https://crrev.com/c/2485908.
418*9c5db199SXin Li        for item in path_to_preserve:
419*9c5db199SXin Li            src = os.path.join(safe_location, item.split('/')[-1])
420*9c5db199SXin Li            dest = '/'.join(item.split('/')[:-1])
421*9c5db199SXin Li            self.run('mv %s %s' % (src, dest), ignore_status=True)
422*9c5db199SXin Li
423*9c5db199SXin Li    def power_cycle(self):
424*9c5db199SXin Li        """Cycle power to this host via PoE(servo v3) or RPM(labstation)
425*9c5db199SXin Li        if it is a lab device.
426*9c5db199SXin Li
427*9c5db199SXin Li        @raises AutoservRepairError if it fails to power cycle the
428*9c5db199SXin Li                servo host.
429*9c5db199SXin Li
430*9c5db199SXin Li        """
431*9c5db199SXin Li        if self.has_power():
432*9c5db199SXin Li            try:
433*9c5db199SXin Li                rpm_client.set_power(self, 'CYCLE')
434*9c5db199SXin Li            except (socket.error, six.moves.xmlrpc_client.Error,
435*9c5db199SXin Li                    six.moves.http_client.BadStatusLine,
436*9c5db199SXin Li                    rpm_client.RemotePowerException) as e:
437*9c5db199SXin Li                raise hosts.AutoservRepairError(
438*9c5db199SXin Li                    'Power cycling %s failed: %s' % (self.hostname, e),
439*9c5db199SXin Li                    'power_cycle_via_rpm_failed'
440*9c5db199SXin Li                )
441*9c5db199SXin Li        else:
442*9c5db199SXin Li            logging.info('Skipping power cycling, not a lab device.')
443*9c5db199SXin Li
444*9c5db199SXin Li
445*9c5db199SXin Li    def _servo_host_reboot(self):
446*9c5db199SXin Li        """Reboot this servo host because a reboot is requested."""
447*9c5db199SXin Li        try:
448*9c5db199SXin Li            # TODO(otabek) remove if found the fix for b/174514811
449*9c5db199SXin Li            # The default factory firmware remember the latest chromeboxes
450*9c5db199SXin Li            # status after power off. If box was in sleep mode before the
451*9c5db199SXin Li            # break, the box will stay at sleep mode after power on.
452*9c5db199SXin Li            # Disable power manager has make chromebox to boot always when
453*9c5db199SXin Li            # we deliver the power to the device.
454*9c5db199SXin Li            logging.info('Stoping powerd service on device')
455*9c5db199SXin Li            self.run('stop powerd', ignore_status=True, timeout=30)
456*9c5db199SXin Li        except Exception as e:
457*9c5db199SXin Li            logging.debug('(Not critical) Fail to stop powerd; %s', e)
458*9c5db199SXin Li
459*9c5db199SXin Li        logging.info('Rebooting servo host %s from build %s', self.hostname,
460*9c5db199SXin Li                     self.get_release_version())
461*9c5db199SXin Li        # Tell the reboot() call not to wait for completion.
462*9c5db199SXin Li        # Otherwise, the call will log reboot failure if servo does
463*9c5db199SXin Li        # not come back.  The logged reboot failure will lead to
464*9c5db199SXin Li        # test job failure.  If the test does not require servo, we
465*9c5db199SXin Li        # don't want servo failure to fail the test with error:
466*9c5db199SXin Li        # `Host did not return from reboot` in status.log.
467*9c5db199SXin Li        self.reboot(fastsync=True, wait=False)
468*9c5db199SXin Li
469*9c5db199SXin Li        # We told the reboot() call not to wait, but we need to wait
470*9c5db199SXin Li        # for the reboot before we continue.  Alas.  The code from
471*9c5db199SXin Li        # here below is basically a copy of Host.wait_for_restart(),
472*9c5db199SXin Li        # with the logging bits ripped out, so that they can't cause
473*9c5db199SXin Li        # the failure logging problem described above.
474*9c5db199SXin Li        #
475*9c5db199SXin Li        # The stain that this has left on my soul can never be
476*9c5db199SXin Li        # erased.
477*9c5db199SXin Li        old_boot_id = self.get_boot_id()
478*9c5db199SXin Li        if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
479*9c5db199SXin Li                              warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
480*9c5db199SXin Li                              old_boot_id=old_boot_id):
481*9c5db199SXin Li            raise error.AutoservHostError(
482*9c5db199SXin Li                'servo host %s failed to shut down.' %
483*9c5db199SXin Li                self.hostname)
484*9c5db199SXin Li        if self.wait_up(timeout=self.REBOOT_TIMEOUT):
485*9c5db199SXin Li            logging.info('servo host %s back from reboot, with build %s',
486*9c5db199SXin Li                         self.hostname, self.get_release_version())
487*9c5db199SXin Li        else:
488*9c5db199SXin Li            raise error.AutoservHostError(
489*9c5db199SXin Li                'servo host %s failed to come back from reboot.' %
490*9c5db199SXin Li                self.hostname)
491*9c5db199SXin Li
492*9c5db199SXin Li
493*9c5db199SXin Li    def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
494*9c5db199SXin Li        connect_timeout=None, alive_interval=None, alive_count_max=None,
495*9c5db199SXin Li        connection_attempts=None):
496*9c5db199SXin Li        """Override default make_ssh_command to use tuned options.
497*9c5db199SXin Li
498*9c5db199SXin Li        Tuning changes:
499*9c5db199SXin Li          - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
500*9c5db199SXin Li          connection failure. Consistency with remote_access.py.
501*9c5db199SXin Li
502*9c5db199SXin Li          - ServerAliveInterval=180; which causes SSH to ping connection every
503*9c5db199SXin Li          180 seconds. In conjunction with ServerAliveCountMax ensures
504*9c5db199SXin Li          that if the connection dies, Autotest will bail out quickly.
505*9c5db199SXin Li
506*9c5db199SXin Li          - ServerAliveCountMax=3; consistency with remote_access.py.
507*9c5db199SXin Li
508*9c5db199SXin Li          - ConnectAttempts=4; reduce flakiness in connection errors;
509*9c5db199SXin Li          consistency with remote_access.py.
510*9c5db199SXin Li
511*9c5db199SXin Li          - UserKnownHostsFile=/dev/null; we don't care about the keys.
512*9c5db199SXin Li
513*9c5db199SXin Li          - SSH protocol forced to 2; needed for ServerAliveInterval.
514*9c5db199SXin Li
515*9c5db199SXin Li        @param user User name to use for the ssh connection.
516*9c5db199SXin Li        @param port Port on the target host to use for ssh connection.
517*9c5db199SXin Li        @param opts Additional options to the ssh command.
518*9c5db199SXin Li        @param hosts_file Ignored.
519*9c5db199SXin Li        @param connect_timeout Ignored.
520*9c5db199SXin Li        @param alive_interval Ignored.
521*9c5db199SXin Li        @param alive_count_max Ignored.
522*9c5db199SXin Li        @param connection_attempts Ignored.
523*9c5db199SXin Li
524*9c5db199SXin Li        @returns: An ssh command with the requested settings.
525*9c5db199SXin Li
526*9c5db199SXin Li        """
527*9c5db199SXin Li        options = ' '.join([opts, '-o Protocol=2'])
528*9c5db199SXin Li        return super(BaseServoHost, self).make_ssh_command(
529*9c5db199SXin Li            user=user, port=port, opts=options, hosts_file='/dev/null',
530*9c5db199SXin Li            connect_timeout=30, alive_interval=180, alive_count_max=3,
531*9c5db199SXin Li            connection_attempts=4)
532*9c5db199SXin Li
533*9c5db199SXin Li
534*9c5db199SXin Li    def _make_scp_cmd(self, sources, dest):
535*9c5db199SXin Li        """Format scp command.
536*9c5db199SXin Li
537*9c5db199SXin Li        Given a list of source paths and a destination path, produces the
538*9c5db199SXin Li        appropriate scp command for encoding it. Remote paths must be
539*9c5db199SXin Li        pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
540*9c5db199SXin Li        to allow additional ssh options.
541*9c5db199SXin Li
542*9c5db199SXin Li        @param sources: A list of source paths to copy from.
543*9c5db199SXin Li        @param dest: Destination path to copy to.
544*9c5db199SXin Li
545*9c5db199SXin Li        @returns: An scp command that copies |sources| on local machine to
546*9c5db199SXin Li                  |dest| on the remote servo host.
547*9c5db199SXin Li
548*9c5db199SXin Li        """
549*9c5db199SXin Li        command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
550*9c5db199SXin Li                   '-o UserKnownHostsFile=/dev/null %s %s "%s"')
551*9c5db199SXin Li        port = self.port
552*9c5db199SXin Li        if port is None:
553*9c5db199SXin Li            logging.info('BaseServoHost: defaulting to port 22. See b/204502754.')
554*9c5db199SXin Li            port = 22
555*9c5db199SXin Li        args = (
556*9c5db199SXin Li            self._main_ssh.ssh_option,
557*9c5db199SXin Li            ("-P %s" % port),
558*9c5db199SXin Li            sources,
559*9c5db199SXin Li            dest,
560*9c5db199SXin Li        )
561*9c5db199SXin Li        return command % args
562*9c5db199SXin Li
563*9c5db199SXin Li
564*9c5db199SXin Li    def run(self, command, timeout=3600, ignore_status=False,
565*9c5db199SXin Li        stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
566*9c5db199SXin Li        connect_timeout=30, ssh_failure_retry_ok=False,
567*9c5db199SXin Li        options='', stdin=None, verbose=True, args=()):
568*9c5db199SXin Li        """Run a command on the servo host.
569*9c5db199SXin Li
570*9c5db199SXin Li        Extends method `run` in SSHHost. If the servo host is a remote device,
571*9c5db199SXin Li        it will call `run` in SSHost without changing anything.
572*9c5db199SXin Li        If the servo host is 'localhost', it will call utils.system_output.
573*9c5db199SXin Li
574*9c5db199SXin Li        @param command: The command line string.
575*9c5db199SXin Li        @param timeout: Time limit in seconds before attempting to
576*9c5db199SXin Li                        kill the running process. The run() function
577*9c5db199SXin Li                        will take a few seconds longer than 'timeout'
578*9c5db199SXin Li                        to complete if it has to kill the process.
579*9c5db199SXin Li        @param ignore_status: Do not raise an exception, no matter
580*9c5db199SXin Li                              what the exit code of the command is.
581*9c5db199SXin Li        @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
582*9c5db199SXin Li        @param connect_timeout: SSH connection timeout (in seconds)
583*9c5db199SXin Li                                Ignored if host is 'localhost'.
584*9c5db199SXin Li        @param options: String with additional ssh command options
585*9c5db199SXin Li                        Ignored if host is 'localhost'.
586*9c5db199SXin Li        @param ssh_failure_retry_ok: when True and ssh connection failure is
587*9c5db199SXin Li                                     suspected, OK to retry command (but not
588*9c5db199SXin Li                                     compulsory, and likely not needed here)
589*9c5db199SXin Li        @param stdin: Stdin to pass (a string) to the executed command.
590*9c5db199SXin Li        @param verbose: Log the commands.
591*9c5db199SXin Li        @param args: Sequence of strings to pass as arguments to command by
592*9c5db199SXin Li                     quoting them in " and escaping their contents if necessary.
593*9c5db199SXin Li
594*9c5db199SXin Li        @returns: A utils.CmdResult object.
595*9c5db199SXin Li
596*9c5db199SXin Li        @raises AutoservRunError if the command failed.
597*9c5db199SXin Li        @raises AutoservSSHTimeout SSH connection has timed out. Only applies
598*9c5db199SXin Li                when servo host is not 'localhost'.
599*9c5db199SXin Li
600*9c5db199SXin Li        """
601*9c5db199SXin Li        run_args = {
602*9c5db199SXin Li            'command'             : command,
603*9c5db199SXin Li            'timeout'             : timeout,
604*9c5db199SXin Li            'ignore_status'       : ignore_status,
605*9c5db199SXin Li            'stdout_tee'          : stdout_tee,
606*9c5db199SXin Li            'stderr_tee'          : stderr_tee,
607*9c5db199SXin Li            # connect_timeout     n/a for localhost
608*9c5db199SXin Li            # options             n/a for localhost
609*9c5db199SXin Li            # ssh_failure_retry_ok n/a for localhost
610*9c5db199SXin Li            'stdin'               : stdin,
611*9c5db199SXin Li            'verbose'             : verbose,
612*9c5db199SXin Li            'args'                : args,
613*9c5db199SXin Li        }
614*9c5db199SXin Li        if self.is_containerized_servod():
615*9c5db199SXin Li            logging.debug("Trying to run the command %s", command)
616*9c5db199SXin Li            client = docker_utils.get_docker_client(timeout=timeout)
617*9c5db199SXin Li            container = client.containers.get(self.servod_container_name)
618*9c5db199SXin Li            try:
619*9c5db199SXin Li                (exit_code,
620*9c5db199SXin Li                 output) = container.exec_run("bash -c '%s'" % command)
621*9c5db199SXin Li                # b/217780680, Make this compatible with python3,
622*9c5db199SXin Li                if isinstance(output, bytes):
623*9c5db199SXin Li                    output = output.decode(errors='replace')
624*9c5db199SXin Li            except docker.errors.APIError:
625*9c5db199SXin Li                logging.exception("Failed to run command %s", command)
626*9c5db199SXin Li                for line in container.logs().split(b'\n'):
627*9c5db199SXin Li                    logging.error(line)
628*9c5db199SXin Li                return utils.CmdResult(command=command,
629*9c5db199SXin Li                                       stdout="",
630*9c5db199SXin Li                                       exit_status=-1)
631*9c5db199SXin Li            return utils.CmdResult(command=command,
632*9c5db199SXin Li                                   stdout=output,
633*9c5db199SXin Li                                   exit_status=exit_code)
634*9c5db199SXin Li        elif self.is_localhost():
635*9c5db199SXin Li            if self._sudo_required:
636*9c5db199SXin Li                run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
637*9c5db199SXin Li                        command)
638*9c5db199SXin Li            try:
639*9c5db199SXin Li                return utils.run(**run_args)
640*9c5db199SXin Li            except error.CmdError as e:
641*9c5db199SXin Li                logging.error(e)
642*9c5db199SXin Li                raise error.AutoservRunError('command execution error',
643*9c5db199SXin Li                                             e.result_obj)
644*9c5db199SXin Li        else:
645*9c5db199SXin Li            run_args['connect_timeout'] = connect_timeout
646*9c5db199SXin Li            run_args['options'] = options
647*9c5db199SXin Li            run_args['ssh_failure_retry_ok'] = ssh_failure_retry_ok
648*9c5db199SXin Li            return super(BaseServoHost, self).run(**run_args)
649*9c5db199SXin Li
650*9c5db199SXin Li    def _mount_drive(self, src_path, dst_path):
651*9c5db199SXin Li        """Mount an external drive on servohost.
652*9c5db199SXin Li
653*9c5db199SXin Li        @param: src_path  the drive path to mount(e.g. /dev/sda3).
654*9c5db199SXin Li        @param: dst_path  the destination directory on servohost to mount
655*9c5db199SXin Li                          the drive.
656*9c5db199SXin Li
657*9c5db199SXin Li        @returns: True if mount success otherwise False.
658*9c5db199SXin Li        """
659*9c5db199SXin Li        # Make sure the dst dir exists.
660*9c5db199SXin Li        self.run('mkdir -p %s' % dst_path)
661*9c5db199SXin Li
662*9c5db199SXin Li        result = self.run('mount -o ro %s %s' % (src_path, dst_path),
663*9c5db199SXin Li                          ignore_status=True)
664*9c5db199SXin Li        return result.exit_status == 0
665*9c5db199SXin Li
666*9c5db199SXin Li    def _unmount_drive(self, mount_path):
667*9c5db199SXin Li        """Unmount a drive from servohost.
668*9c5db199SXin Li
669*9c5db199SXin Li        @param: mount_path  the path on servohost to unmount.
670*9c5db199SXin Li
671*9c5db199SXin Li        @returns: True if unmount success otherwise False.
672*9c5db199SXin Li        """
673*9c5db199SXin Li        result = self.run('umount %s' % mount_path, ignore_status=True)
674*9c5db199SXin Li        return result.exit_status == 0
675*9c5db199SXin Li
676*9c5db199SXin Li    def wait_ready(self, required_uptime=300):
677*9c5db199SXin Li        """Wait ready for a servohost if it has been rebooted recently.
678*9c5db199SXin Li
679*9c5db199SXin Li        It may take a few minutes until all servos and their componments
680*9c5db199SXin Li        re-enumerated and become ready after a servohost(especially labstation
681*9c5db199SXin Li        as it supports multiple servos) reboot, so we need to make sure the
682*9c5db199SXin Li        servohost has been up for a given a mount of time before trying to
683*9c5db199SXin Li        start any actions.
684*9c5db199SXin Li
685*9c5db199SXin Li        @param required_uptime: Minimum uptime in seconds that we can
686*9c5db199SXin Li                                consdier a servohost be ready.
687*9c5db199SXin Li        """
688*9c5db199SXin Li        uptime = float(self.check_uptime())
689*9c5db199SXin Li        # To prevent unexpected output from check_uptime() that causes long
690*9c5db199SXin Li        # sleep, make sure the maximum wait time <= required_uptime.
691*9c5db199SXin Li        diff = min(required_uptime - uptime, required_uptime)
692*9c5db199SXin Li        if diff > 0:
693*9c5db199SXin Li            logging.info(
694*9c5db199SXin Li                    'The servohost was just rebooted, wait %s'
695*9c5db199SXin Li                    ' seconds for it to become ready', diff)
696*9c5db199SXin Li            time.sleep(diff)
697*9c5db199SXin Li
698*9c5db199SXin Li    def is_up(self,
699*9c5db199SXin Li              timeout=60,
700*9c5db199SXin Li              connect_timeout=None,
701*9c5db199SXin Li              base_cmd="true",
702*9c5db199SXin Li              with_servod=True):
703*9c5db199SXin Li        """
704*9c5db199SXin Li        Check if the remote host is up by ssh-ing and running a base command.
705*9c5db199SXin Li
706*9c5db199SXin Li        @param timeout: command execution timeout in seconds.
707*9c5db199SXin Li        @param connect_timeout: ssh connection timeout in seconds.
708*9c5db199SXin Li        @param base_cmd: a base command to run with ssh. The default is 'true'.
709*9c5db199SXin Li        @returns True if the remote host is up before the timeout expires,
710*9c5db199SXin Li                 False otherwise.
711*9c5db199SXin Li        """
712*9c5db199SXin Li        if self.is_containerized_servod():
713*9c5db199SXin Li            client = docker_utils.get_docker_client(timeout=timeout)
714*9c5db199SXin Li            # Look up the container list with hostname and with/without servod process by label.
715*9c5db199SXin Li            containers = client.containers.list(
716*9c5db199SXin Li                    filters={
717*9c5db199SXin Li                            'name': self.hostname,
718*9c5db199SXin Li                            'label': ["WITH_SERVOD=%s" % str(with_servod)]
719*9c5db199SXin Li                    })
720*9c5db199SXin Li            if not containers:
721*9c5db199SXin Li                return False
722*9c5db199SXin Li            elif with_servod:
723*9c5db199SXin Li                # For container with servod process, check if servod process started.
724*9c5db199SXin Li                (exit_code, output) = containers[0].exec_run("ps")
725*9c5db199SXin Li                logging.info("Is Up output %s", output)
726*9c5db199SXin Li                if b"servod" not in output:
727*9c5db199SXin Li                    return False
728*9c5db199SXin Li            return True
729*9c5db199SXin Li        else:
730*9c5db199SXin Li            return super(BaseServoHost, self).is_up(timeout, connect_timeout,
731*9c5db199SXin Li                                                    base_cmd)
732