xref: /aosp_15_r20/external/autotest/server/hosts/remote.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# Lint as: python2, python3
2"""This class defines the Remote host class."""
3
4from __future__ import absolute_import
5from __future__ import division
6from __future__ import print_function
7import os, logging, time
8import six
9from six.moves import urllib
10import re
11
12import common
13
14from autotest_lib.client.common_lib import error
15from autotest_lib.client.common_lib.global_config import global_config
16from autotest_lib.server import utils
17from autotest_lib.server.hosts import base_classes
18from autotest_lib.server.hosts.tls_client.connection import TLSConnection
19
20
21class RemoteHost(base_classes.Host):
22    """
23    This class represents a remote machine on which you can run
24    programs.
25
26    It may be accessed through a network, a serial line, ...
27    It is not the machine autoserv is running on.
28
29    Implementation details:
30    This is an abstract class, leaf subclasses must implement the methods
31    listed here and in parent classes which have no implementation. They
32    may reimplement methods which already have an implementation. You
33    must not instantiate this class but should instantiate one of those
34    leaf subclasses.
35    """
36
37    DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
38    DEFAULT_HALT_TIMEOUT = 2 * 60
39    _LABEL_FUNCTIONS = []
40    _DETECTABLE_LABELS = []
41
42    VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
43    TMP_DIR_TEMPLATE = '/usr/local/tmp/autoserv-XXXXXX'
44
45
46    def _initialize(self, hostname, autodir=None, *args, **dargs):
47        super(RemoteHost, self)._initialize(*args, **dargs)
48
49        self.hostname = hostname
50        self.autodir = autodir
51        self.tmp_dirs = []
52
53        get_value = global_config.get_config_value
54
55        self.tls_connection = None
56        try:
57            self.tls_connection = TLSConnection()
58        except Exception as e:
59            logging.warning("Could not establish TLS connection %s", e)
60
61    def __repr__(self):
62        return "<remote host: %s>" % self.hostname
63
64
65    def close(self):
66        # pylint: disable=missing-docstring
67        super(RemoteHost, self).close()
68        self.stop_loggers()
69
70        if hasattr(self, 'tmp_dirs'):
71            for dir in self.tmp_dirs:
72                try:
73                    self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
74                except error.AutoservRunError:
75                    pass
76        if self.tls_connection:
77            self.tls_connection.close()
78            self.tls_connection = None
79
80    def job_start(self):
81        """
82        Abstract method, called the first time a remote host object
83        is created for a specific host after a job starts.
84
85        This method depends on the create_host factory being used to
86        construct your host object. If you directly construct host objects
87        you will need to call this method yourself (and enforce the
88        single-call rule).
89        """
90        try:
91            cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
92                   '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
93            self.run(cmd)
94        except Exception as e:
95            # Non-fatal error
96            logging.info('Failed to copy /var/log/messages at startup: %s', e)
97
98
99    def get_autodir(self):
100        return self.autodir
101
102
103    def set_autodir(self, autodir):
104        """
105        This method is called to make the host object aware of the
106        where autotest is installed. Called in server/autotest.py
107        after a successful install
108        """
109        self.autodir = autodir
110
111
112    def sysrq_reboot(self):
113        # pylint: disable=missing-docstring
114        self.run_background('echo b > /proc/sysrq-trigger')
115
116
117    def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
118        """
119        Shut down the remote host.
120
121        N.B.  This method makes no provision to bring the target back
122        up.  The target will be offline indefinitely if there's no
123        independent hardware (servo, RPM, etc.) to force the target to
124        power on.
125
126        @param timeout  Maximum time to wait for host down, in seconds.
127        @param wait  Whether to wait for the host to go offline.
128        """
129        self.run_background('sleep 1 ; halt')
130        if wait:
131            self.wait_down(timeout=timeout)
132
133
134    def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
135               fastsync=False, reboot_cmd=None, **dargs):
136        """
137        Reboot the remote host.
138
139        Args:
140                timeout - How long to wait for the reboot.
141                wait - Should we wait to see if the machine comes back up.
142                       If this is set to True, ignores reboot_cmd's error
143                       even if occurs.
144                fastsync - Don't wait for the sync to complete, just start one
145                        and move on. This is for cases where rebooting prompty
146                        is more important than data integrity and/or the
147                        machine may have disks that cause sync to never return.
148                reboot_cmd - Reboot command to execute.
149        """
150        self.reboot_setup(**dargs)
151        if not reboot_cmd:
152            reboot_cmd = ('sync & sleep 5; '
153                          'reboot & sleep 60; '
154                          'reboot -f & sleep 10; '
155                          'reboot -nf & sleep 10; '
156                          'telinit 6')
157
158        def reboot():
159            # pylint: disable=missing-docstring
160            self.record("GOOD", None, "reboot.start")
161            current_boot_id = None
162            try:
163                current_boot_id = self.get_boot_id()
164
165                # sync before starting the reboot, so that a long sync during
166                # shutdown isn't timed out by wait_down's short timeout
167                if not fastsync:
168                    self.run('sync; sync', timeout=timeout, ignore_status=True)
169
170                self.run_background(reboot_cmd)
171            except error.AutoservRunError:
172                # If wait is set, ignore the error here, and rely on the
173                # wait_for_restart() for stability, instead.
174                # reboot_cmd sometimes causes an error even if reboot is
175                # successfully in progress. This is difficult to be avoided,
176                # because we have no much control on remote machine after
177                # "reboot" starts.
178                if not wait or current_boot_id is None:
179                    # TODO(b/37652392): Revisit no-wait case, later.
180                    self.record("ABORT", None, "reboot.start",
181                                "reboot command failed")
182                    raise
183            if wait:
184                self.wait_for_restart(timeout, old_boot_id=current_boot_id,
185                                      **dargs)
186
187        # if this is a full reboot-and-wait, run the reboot inside a group
188        if wait:
189            self.log_op(self.OP_REBOOT, reboot)
190        else:
191            reboot()
192
193    def suspend(self, timeout, suspend_cmd,
194                allow_early_resume=False):
195        """
196        Suspend the remote host.
197
198        Args:
199                timeout - How long to wait for the suspend in integer seconds.
200                suspend_cmd - suspend command to execute.
201                allow_early_resume - Boolean that indicate whether resume
202                                     before |timeout| is ok.
203        Raises:
204                error.AutoservSuspendError - If |allow_early_resume| is False
205                                             and if device resumes before
206                                             |timeout|.
207        """
208        # define a function for the supend and run it in a group
209        def suspend():
210            # pylint: disable=missing-docstring
211            self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
212            try:
213                self.run_background(suspend_cmd)
214            except error.AutoservRunError:
215                self.record("ABORT", None, "suspend.start",
216                            "suspend command failed")
217                raise error.AutoservSuspendError("suspend command failed")
218
219            # Wait for some time, to ensure the machine is going to sleep.
220            # Not too long to check if the machine really suspended.
221            time_slice = min(timeout / 2, 300)
222            time.sleep(time_slice)
223            time_counter = time_slice
224            while time_counter < timeout + 60:
225                # Check if the machine is back. We check regularely to
226                # ensure the machine was suspended long enough.
227                if utils.ping(self.hostname, tries=1, deadline=1) == 0:
228                    return
229                else:
230                    if time_counter > timeout - 10:
231                        time_slice = 5
232                    time.sleep(time_slice)
233                    time_counter += time_slice
234
235            if utils.ping(self.hostname, tries=1, deadline=1) != 0:
236                raise error.AutoservSuspendError(
237                    "DUT is not responding after %d seconds" % (time_counter))
238
239        start_time = time.time()
240        self.log_op(self.OP_SUSPEND, suspend)
241        lasted = time.time() - start_time
242        logging.info("Device resumed after %d secs", lasted)
243        if (lasted < timeout and not allow_early_resume):
244            raise error.AutoservSuspendError(
245                "Suspend did not last long enough: %d instead of %d" % (
246                    lasted, timeout))
247
248    def reboot_followup(self, *args, **dargs):
249        # pylint: disable=missing-docstring
250        super(RemoteHost, self).reboot_followup(*args, **dargs)
251        if self.job:
252            self.job.profilers.handle_reboot(self)
253
254
255    def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
256        """
257        Wait for the host to come back from a reboot. This wraps the
258        generic wait_for_restart implementation in a reboot group.
259        """
260        def op_func():
261            # pylint: disable=missing-docstring
262            super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
263        self.log_op(self.OP_REBOOT, op_func)
264
265
266    def cleanup(self):
267        # pylint: disable=missing-docstring
268        super(RemoteHost, self).cleanup()
269        self.reboot()
270
271
272    def get_tmp_dir(self, parent='/tmp'):
273        """
274        Return the pathname of a directory on the host suitable
275        for temporary file storage.
276
277        The directory and its content will be deleted automatically
278        on the destruction of the Host object that was used to obtain
279        it.
280        """
281        template = os.path.join(parent, self.TMP_DIR_TEMPLATE)
282        parent = os.path.dirname(template)
283        dir_name = self.run('mkdir -p %s && mktemp -d %s' % (parent, template)).stdout.rstrip()
284        self.tmp_dirs.append(dir_name)
285        return dir_name
286
287
288    def get_platform_label(self):
289        """
290        Return the platform label, or None if platform label is not set.
291        """
292
293        if self.job:
294            keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
295                                       self.hostname)
296            keyvals = utils.read_keyval(keyval_path)
297            return keyvals.get('platform', None)
298        else:
299            return None
300
301
302    def get_all_labels(self):
303        """
304        Return all labels, or empty list if label is not set.
305        """
306        if self.job:
307            keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
308                                       self.hostname)
309            keyvals = utils.read_keyval(keyval_path)
310            all_labels = keyvals.get('labels', '')
311            if all_labels:
312                all_labels = all_labels.split(',')
313                return [urllib.parse.unquote(label) for label in all_labels]
314        return []
315
316
317    def delete_tmp_dir(self, tmpdir):
318        """
319        Delete the given temporary directory on the remote machine.
320
321        @param tmpdir The directory to delete.
322        """
323        self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
324        self.tmp_dirs.remove(tmpdir)
325
326
327    def delete_all_tmp_dirs(self, parent='/tmp'):
328        """
329        Delete all directories in parent that were created by get_tmp_dir
330
331        Note that this may involve deleting directories created by calls to
332        get_tmp_dir on a different RemoteHost instance than the one running this
333        method. Only perform this operation when certain that this will not
334        cause unexpected behavior.
335        """
336        # follow mktemp's behavior of only expanding 3 or more consecutive Xs
337        if isinstance(parent, (list, tuple)):
338            parents = parent
339        else:
340            parents = [parent]
341        rm_paths = []
342        for parent in parents:
343            base_template = re.sub('XXXX*', '*', self.TMP_DIR_TEMPLATE)
344            # distinguish between non-wildcard asterisks in parent directory name
345            # and wildcards inserted from the template
346            base = '*'.join(
347                ['"%s"' % utils.sh_escape(x) for x in base_template.split('*')])
348            path = '"%s' % os.path.join(utils.sh_escape(parent), base[1:])
349            rm_paths.append(path)
350            # remove deleted directories from tmp_dirs
351            regex = os.path.join(parent, re.sub('(XXXX*)',
352                            lambda match: '[a-zA-Z0-9]{%d}' % len(match.group(1)),
353                            self.TMP_DIR_TEMPLATE))
354            regex += '(/|$)' # remove if matches, or is within a dir that matches
355            self.tmp_dirs = [x for x in self.tmp_dirs if not re.match(regex, x)]
356
357        self.run('rm -rf {}'.format(" ".join(rm_paths)), ignore_status=True)
358
359    def check_uptime(self):
360        """
361        Check that uptime is available and monotonically increasing.
362        """
363        if not self.is_up():
364            raise error.AutoservHostError('Client does not appear to be up')
365        result = self.run("/bin/cat /proc/uptime", 30)
366        return result.stdout.strip().split()[0]
367
368
369    def check_for_lkdtm(self):
370        """
371        Check for kernel dump test module. return True if exist.
372        """
373        cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
374        return self.run(cmd, ignore_status=True).exit_status == 0
375
376
377    def are_wait_up_processes_up(self):
378        """
379        Checks if any HOSTS waitup processes are running yet on the
380        remote host.
381
382        Returns True if any the waitup processes are running, False
383        otherwise.
384        """
385        processes = self.get_wait_up_processes()
386        if len(processes) == 0:
387            return True # wait up processes aren't being used
388        for procname in processes:
389            exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
390                                   ignore_status=True).exit_status
391            if exit_status == 0:
392                return True
393        return False
394
395
396    def get_labels(self):
397        """Return a list of labels for this given host.
398
399        This is the main way to retrieve all the automatic labels for a host
400        as it will run through all the currently implemented label functions.
401        """
402        labels = []
403        for label_function in self._LABEL_FUNCTIONS:
404            try:
405                label = label_function(self)
406            except Exception:
407                logging.exception('Label function %s failed; ignoring it.',
408                                  label_function.__name__)
409                label = None
410            if label:
411                if type(label) is str:
412                    labels.append(label)
413                elif type(label) is list:
414                    labels.extend(label)
415        return labels
416
417    def get_result_dir(self):
418        """Return the result directory path if passed or None if not.
419
420        @return string
421        """
422        if self.job and hasattr(self.job, 'resultdir'):
423            return self.job.resultdir
424        return None
425