xref: /aosp_15_r20/external/autotest/client/cros/power/power_suspend.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li# Lint as: python2, python3
2*9c5db199SXin Li# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
4*9c5db199SXin Li# found in the LICENSE file.
5*9c5db199SXin Li
6*9c5db199SXin Liimport collections, logging, os, re, subprocess, time
7*9c5db199SXin Li
8*9c5db199SXin Lifrom autotest_lib.client.bin import utils
9*9c5db199SXin Lifrom autotest_lib.client.common_lib import error
10*9c5db199SXin Lifrom autotest_lib.client.cros import cros_logging
11*9c5db199SXin Lifrom autotest_lib.client.cros.power import power_status
12*9c5db199SXin Lifrom autotest_lib.client.cros.power import power_utils
13*9c5db199SXin Lifrom autotest_lib.client.cros.power import sys_power
14*9c5db199SXin Li#pylint: disable=W0611
15*9c5db199SXin Lifrom autotest_lib.client.cros import flimflam_test_path
16*9c5db199SXin Liimport flimflam
17*9c5db199SXin Lifrom six.moves import range
18*9c5db199SXin Li
19*9c5db199SXin Li
20*9c5db199SXin Liclass Suspender(object):
21*9c5db199SXin Li    """Class for suspend/resume measurements.
22*9c5db199SXin Li
23*9c5db199SXin Li    Public attributes:
24*9c5db199SXin Li        disconnect_3G_time: Amount of seconds it took to disable 3G.
25*9c5db199SXin Li        successes[]: List of timing measurement dicts from successful suspends.
26*9c5db199SXin Li        failures[]: List of SuspendFailure exceptions from failed suspends.
27*9c5db199SXin Li        device_times[]: List of individual device suspend/resume time dicts.
28*9c5db199SXin Li
29*9c5db199SXin Li    Public methods:
30*9c5db199SXin Li        suspend: Do a suspend/resume cycle. Return timing measurement dict.
31*9c5db199SXin Li
32*9c5db199SXin Li    Private attributes:
33*9c5db199SXin Li        _logs: Array of /var/log/messages lines since start of suspend cycle.
34*9c5db199SXin Li        _log_file: Open file descriptor at the end of /var/log/messages.
35*9c5db199SXin Li        _logdir: Directory to store firmware logs in case of errors.
36*9c5db199SXin Li        _suspend: Set to the sys_power suspend function to use.
37*9c5db199SXin Li        _throw: Set to have SuspendFailure exceptions raised to the caller.
38*9c5db199SXin Li        _reset_pm_print_times: Set to deactivate pm_print_times after the test.
39*9c5db199SXin Li        _restart_tlsdated: Set to restart tlsdated after the test.
40*9c5db199SXin Li
41*9c5db199SXin Li    Private methods:
42*9c5db199SXin Li        __init__: Shuts off tlsdated for duration of test, disables 3G
43*9c5db199SXin Li        __del__: Restore tlsdated (must run eventually, but GC delay no problem)
44*9c5db199SXin Li        _set_pm_print_times: Enable/disable kernel device suspend timing output.
45*9c5db199SXin Li        _check_failure_log: Check /sys/.../suspend_stats for new failures.
46*9c5db199SXin Li        _ts: Returns a timestamp from /run/power_manager/last_resume_timings
47*9c5db199SXin Li        _hwclock_ts: Read RTC timestamp left on resume in hwclock-on-resume
48*9c5db199SXin Li        _device_resume_time: Read seconds overall device resume took from logs.
49*9c5db199SXin Li        _individual_device_times: Reads individual device suspend/resume times.
50*9c5db199SXin Li        _identify_driver: Return the driver name of a device (or "unknown").
51*9c5db199SXin Li    """
52*9c5db199SXin Li
53*9c5db199SXin Li    _DEFAULT_SUSPEND_DELAY = 15
54*9c5db199SXin Li    # Dictionary of board-specific "time to suspend" values determined
55*9c5db199SXin Li    # empirically where 'key' is <board> and 'value' is suspend delay in
56*9c5db199SXin Li    # seconds.
57*9c5db199SXin Li    _SUSPEND_DELAY = {}
58*9c5db199SXin Li
59*9c5db199SXin Li    # alarm/not_before value guaranteed to raise SpuriousWakeup in _hwclock_ts
60*9c5db199SXin Li    _ALARM_FORCE_EARLY_WAKEUP = 2147483647
61*9c5db199SXin Li
62*9c5db199SXin Li    # File written by send_metrics_on_resume containing timing information about
63*9c5db199SXin Li    # the last resume.
64*9c5db199SXin Li    _TIMINGS_FILE = '/run/power_manager/root/last_resume_timings'
65*9c5db199SXin Li
66*9c5db199SXin Li    # Latest powerd log files
67*9c5db199SXin Li    _POWERD_LOG_LATEST_PATH = '/var/log/power_manager/powerd.LATEST'
68*9c5db199SXin Li    _POWERD_LOG_PREVIOUS_PATH = '/var/log/power_manager/powerd.PREVIOUS'
69*9c5db199SXin Li
70*9c5db199SXin Li    # Line to look for suspend abort due to override.
71*9c5db199SXin Li    _ABORT_DUE_TO_OVERRIDE_LINE = 'Aborting suspend attempt for lockfile'
72*9c5db199SXin Li
73*9c5db199SXin Li    # Regexp to extract TS from powerd log line.
74*9c5db199SXin Li    _POWERD_TS_RE = '\[(\d{4}/\d{6}\.\d{6}):.*\]'
75*9c5db199SXin Li
76*9c5db199SXin Li    # Amount of lines to dump from the eventlog on a SpuriousWakeup. Should be
77*9c5db199SXin Li    # enough to include ACPI Wake Reason... 10 should be far on the safe side.
78*9c5db199SXin Li    _RELEVANT_EVENTLOG_LINES = 10
79*9c5db199SXin Li
80*9c5db199SXin Li    # Check value to catch overlong resume times (from missed RTC wakes)
81*9c5db199SXin Li    _MAX_RESUME_TIME = 10
82*9c5db199SXin Li
83*9c5db199SXin Li    # File written by powerd_suspend containing the hwclock time at resume.
84*9c5db199SXin Li    HWCLOCK_FILE = '/run/power_manager/root/hwclock-on-resume'
85*9c5db199SXin Li
86*9c5db199SXin Li    # File read by powerd to decide on the state to suspend (mem or freeze).
87*9c5db199SXin Li    _SUSPEND_STATE_PREF_FILE = 'suspend_to_idle'
88*9c5db199SXin Li
89*9c5db199SXin Li    # (b/79442787) Ignore these uarch for s0ix residency counter checking
90*9c5db199SXin Li    _IGNORE_S0IX_RESIDENCY_CHECK = ['Skylake']
91*9c5db199SXin Li
92*9c5db199SXin Li    def __init__(self, logdir, method=sys_power.do_suspend,
93*9c5db199SXin Li                 throw=False, device_times=False, suspend_state=''):
94*9c5db199SXin Li        """
95*9c5db199SXin Li        Prepare environment for suspending.
96*9c5db199SXin Li        @param suspend_state: Suspend state to enter into. It can be
97*9c5db199SXin Li                              'mem' or 'freeze' or an empty string. If
98*9c5db199SXin Li                              the suspend state is an empty string,
99*9c5db199SXin Li                              system suspends to the default pref.
100*9c5db199SXin Li        """
101*9c5db199SXin Li        self.disconnect_3G_time = 0
102*9c5db199SXin Li        self.successes = []
103*9c5db199SXin Li        self.failures = []
104*9c5db199SXin Li        self._logdir = logdir
105*9c5db199SXin Li        self._suspend = method
106*9c5db199SXin Li        self._throw = throw
107*9c5db199SXin Li        self._reset_pm_print_times = False
108*9c5db199SXin Li        self._restart_tlsdated = False
109*9c5db199SXin Li        self._log_file = None
110*9c5db199SXin Li        self._powerd_cycle_start_ts = None
111*9c5db199SXin Li        self._suspend_state = suspend_state
112*9c5db199SXin Li        if device_times:
113*9c5db199SXin Li            self.device_times = []
114*9c5db199SXin Li
115*9c5db199SXin Li        # stop tlsdated, make sure we/hwclock have /dev/rtc for ourselves
116*9c5db199SXin Li        if utils.system_output('initctl status tlsdated').find('start') != -1:
117*9c5db199SXin Li            utils.system('initctl stop tlsdated')
118*9c5db199SXin Li            self._restart_tlsdated = True
119*9c5db199SXin Li            # give process's file descriptors time to asynchronously tear down
120*9c5db199SXin Li            time.sleep(0.1)
121*9c5db199SXin Li
122*9c5db199SXin Li        # prime powerd_suspend RTC timestamp saving and make sure hwclock works
123*9c5db199SXin Li        utils.open_write_close(self.HWCLOCK_FILE, '')
124*9c5db199SXin Li        hwclock_output = utils.system_output('hwclock -r --verbose --utc',
125*9c5db199SXin Li                                             ignore_status=True)
126*9c5db199SXin Li        if not re.search('Using.*rtc interface to.*clock', hwclock_output):
127*9c5db199SXin Li            raise error.TestError('hwclock cannot find rtc: ' + hwclock_output)
128*9c5db199SXin Li
129*9c5db199SXin Li        # activate device suspend timing debug output
130*9c5db199SXin Li        if hasattr(self, 'device_times'):
131*9c5db199SXin Li            if not int(utils.read_one_line('/sys/power/pm_print_times')):
132*9c5db199SXin Li                self._set_pm_print_times(True)
133*9c5db199SXin Li                self._reset_pm_print_times = True
134*9c5db199SXin Li
135*9c5db199SXin Li        # Shut down 3G to remove its variability from suspend time measurements
136*9c5db199SXin Li        flim = flimflam.FlimFlam()
137*9c5db199SXin Li        service = flim.FindCellularService(0)
138*9c5db199SXin Li        if service:
139*9c5db199SXin Li            logging.info('Found 3G interface, disconnecting.')
140*9c5db199SXin Li            start_time = time.time()
141*9c5db199SXin Li            (success, status) = flim.DisconnectService(
142*9c5db199SXin Li                    service=service, wait_timeout=60)
143*9c5db199SXin Li            if success:
144*9c5db199SXin Li                logging.info('3G disconnected successfully.')
145*9c5db199SXin Li                self.disconnect_3G_time = time.time() - start_time
146*9c5db199SXin Li            else:
147*9c5db199SXin Li                logging.error('Could not disconnect: %s.', status)
148*9c5db199SXin Li                self.disconnect_3G_time = -1
149*9c5db199SXin Li
150*9c5db199SXin Li        self._configure_suspend_state()
151*9c5db199SXin Li
152*9c5db199SXin Li
153*9c5db199SXin Li    def _configure_suspend_state(self):
154*9c5db199SXin Li        """Configure the suspend state as requested."""
155*9c5db199SXin Li        if self._suspend_state:
156*9c5db199SXin Li            available_suspend_states = utils.read_one_line('/sys/power/state')
157*9c5db199SXin Li            if self._suspend_state not in available_suspend_states:
158*9c5db199SXin Li                raise error.TestNAError('Invalid suspend state: ' +
159*9c5db199SXin Li                                        self._suspend_state)
160*9c5db199SXin Li            # Check the current state. If it is same as the one requested,
161*9c5db199SXin Li            # we don't want to call PowerPrefChanger(restarts powerd).
162*9c5db199SXin Li            if self._suspend_state == power_utils.get_sleep_state():
163*9c5db199SXin Li                return
164*9c5db199SXin Li            should_freeze = '1' if self._suspend_state == 'freeze' else '0'
165*9c5db199SXin Li            new_prefs = {self._SUSPEND_STATE_PREF_FILE: should_freeze}
166*9c5db199SXin Li            self._power_pref_changer = power_utils.PowerPrefChanger(new_prefs)
167*9c5db199SXin Li
168*9c5db199SXin Li
169*9c5db199SXin Li    def _set_pm_print_times(self, on):
170*9c5db199SXin Li        """Enable/disable extra suspend timing output from powerd to syslog."""
171*9c5db199SXin Li        if utils.system('echo %s > /sys/power/pm_print_times' % int(bool(on)),
172*9c5db199SXin Li                ignore_status=True):
173*9c5db199SXin Li            logging.warning('Failed to set pm_print_times to %s', bool(on))
174*9c5db199SXin Li            del self.device_times
175*9c5db199SXin Li            self._reset_pm_print_times = False
176*9c5db199SXin Li        else:
177*9c5db199SXin Li            logging.info('Device resume times set to %s', bool(on))
178*9c5db199SXin Li
179*9c5db199SXin Li
180*9c5db199SXin Li    def _get_board(self):
181*9c5db199SXin Li        """Remove _freon from get_board if found."""
182*9c5db199SXin Li        return (utils.get_board().replace("_freon", ""))
183*9c5db199SXin Li
184*9c5db199SXin Li
185*9c5db199SXin Li    def _retrive_last_matching_line_ts(self, filename, pattern):
186*9c5db199SXin Li        """
187*9c5db199SXin Li        Returns timestamp of last matching line or None
188*9c5db199SXin Li        """
189*9c5db199SXin Li        try:
190*9c5db199SXin Li            with open(filename) as f:
191*9c5db199SXin Li                lines = f.readlines()
192*9c5db199SXin Li        except IOError:
193*9c5db199SXin Li            logging.info('Cannot open %s to retrieve the latest ts.', filename)
194*9c5db199SXin Li        else:
195*9c5db199SXin Li            for line in reversed(lines):
196*9c5db199SXin Li                if re.search(pattern, line):
197*9c5db199SXin Li                    matches = re.search(self._POWERD_TS_RE, line)
198*9c5db199SXin Li                    if matches:
199*9c5db199SXin Li                        return matches.group(1)
200*9c5db199SXin Li        return None
201*9c5db199SXin Li
202*9c5db199SXin Li
203*9c5db199SXin Li    def _aborted_due_to_locking(self):
204*9c5db199SXin Li        """
205*9c5db199SXin Li        Returns true if we found evidences in the powerd log that the suspend
206*9c5db199SXin Li        was aborted due to power_management override lock.
207*9c5db199SXin Li        """
208*9c5db199SXin Li        latest_ts = self._retrive_last_matching_line_ts(
209*9c5db199SXin Li            self._POWERD_LOG_LATEST_PATH, self._ABORT_DUE_TO_OVERRIDE_LINE)
210*9c5db199SXin Li        if latest_ts is None:
211*9c5db199SXin Li            latest_ts = self._retrive_last_matching_line_ts(
212*9c5db199SXin Li                self._POWERD_LOG_PREVIOUS_PATH, self._ABORT_DUE_TO_OVERRIDE_LINE)
213*9c5db199SXin Li
214*9c5db199SXin Li        if latest_ts is None:
215*9c5db199SXin Li            return False
216*9c5db199SXin Li
217*9c5db199SXin Li        if latest_ts > self._powerd_cycle_start_ts:
218*9c5db199SXin Li            return True
219*9c5db199SXin Li
220*9c5db199SXin Li        return False
221*9c5db199SXin Li
222*9c5db199SXin Li
223*9c5db199SXin Li    def _reset_logs(self):
224*9c5db199SXin Li        """Throw away cached log lines and reset log pointer to current end."""
225*9c5db199SXin Li        if self._log_file:
226*9c5db199SXin Li            self._log_file.close()
227*9c5db199SXin Li        self._log_file = open('/var/log/messages',
228*9c5db199SXin Li                              mode='r+',
229*9c5db199SXin Li                              **power_utils.encoding_kwargs())
230*9c5db199SXin Li        self._log_file.seek(0, os.SEEK_END)
231*9c5db199SXin Li        self._logs = []
232*9c5db199SXin Li
233*9c5db199SXin Li        self._powerd_cycle_start_ts = (
234*9c5db199SXin Li            self._retrive_last_matching_line_ts(self._POWERD_LOG_LATEST_PATH,
235*9c5db199SXin Li                                                self._POWERD_TS_RE))
236*9c5db199SXin Li
237*9c5db199SXin Li
238*9c5db199SXin Li    def _check_resume_finished(self, retries=11):
239*9c5db199SXin Li        """
240*9c5db199SXin Li        Read all lines logged since last reset into log cache. Block until last
241*9c5db199SXin Li        powerd_suspend resume message was read. Returns true if succeeded or
242*9c5db199SXin Li        false if took too long
243*9c5db199SXin Li        """
244*9c5db199SXin Li        finished_regex = re.compile(r'powerd_suspend\[\d+\]: Resume finished')
245*9c5db199SXin Li        for retry in range(retries + 1):
246*9c5db199SXin Li            lines = self._log_file.readlines()
247*9c5db199SXin Li            if lines:
248*9c5db199SXin Li                if self._logs and self._logs[-1][-1] != '\n':
249*9c5db199SXin Li                    # Reassemble line that was cut in the middle
250*9c5db199SXin Li                    self._logs[-1] += lines.pop(0)
251*9c5db199SXin Li                self._logs += lines
252*9c5db199SXin Li            for line in reversed(self._logs):
253*9c5db199SXin Li                if (finished_regex.search(line)):
254*9c5db199SXin Li                    return True
255*9c5db199SXin Li            time.sleep(0.005 * 2**retry)
256*9c5db199SXin Li
257*9c5db199SXin Li        return False
258*9c5db199SXin Li
259*9c5db199SXin Li
260*9c5db199SXin Li    def _ts(self, name, retries=11):
261*9c5db199SXin Li        """Searches logs for last timestamp with a given suspend message."""
262*9c5db199SXin Li        # Occasionally need to retry due to races from process wakeup order
263*9c5db199SXin Li        for retry in range(retries + 1):
264*9c5db199SXin Li            try:
265*9c5db199SXin Li                f = open(self._TIMINGS_FILE)
266*9c5db199SXin Li                for line in f:
267*9c5db199SXin Li                    words = line.split('=')
268*9c5db199SXin Li                    if name == words[0]:
269*9c5db199SXin Li                        try:
270*9c5db199SXin Li                            timestamp = float(words[1])
271*9c5db199SXin Li                        except ValueError:
272*9c5db199SXin Li                            logging.warning('Invalid timestamp: %s', line)
273*9c5db199SXin Li                            timestamp = 0
274*9c5db199SXin Li                        return timestamp
275*9c5db199SXin Li            except IOError:
276*9c5db199SXin Li                pass
277*9c5db199SXin Li            time.sleep(0.005 * 2**retry)
278*9c5db199SXin Li
279*9c5db199SXin Li        raise error.TestError('Could not find %s entry.' % name)
280*9c5db199SXin Li
281*9c5db199SXin Li
282*9c5db199SXin Li    def _hwclock_ts(self, not_before):
283*9c5db199SXin Li        """Read the RTC resume timestamp saved by powerd_suspend."""
284*9c5db199SXin Li        early_wakeup = False
285*9c5db199SXin Li        if os.path.exists(self.HWCLOCK_FILE):
286*9c5db199SXin Li            # TODO(crbug.com/733773): Still fragile see bug.
287*9c5db199SXin Li            match = re.search(r'(.+)(\.\d+)[+-]\d+:?\d+$',
288*9c5db199SXin Li                              utils.read_file(self.HWCLOCK_FILE), re.DOTALL)
289*9c5db199SXin Li            if match:
290*9c5db199SXin Li                timeval = time.strptime(match.group(1), "%Y-%m-%d %H:%M:%S")
291*9c5db199SXin Li                seconds = time.mktime(timeval)
292*9c5db199SXin Li                seconds += float(match.group(2))
293*9c5db199SXin Li                logging.debug('RTC resume timestamp read: %f', seconds)
294*9c5db199SXin Li                if not_before is None or seconds >= not_before:
295*9c5db199SXin Li                    return seconds
296*9c5db199SXin Li                early_wakeup = True
297*9c5db199SXin Li        if early_wakeup:
298*9c5db199SXin Li            logging.debug('Early wakeup, dumping eventlog if it exists:\n')
299*9c5db199SXin Li            elog = utils.system_output('elogtool list | tail -n %d' %
300*9c5db199SXin Li                                       self._RELEVANT_EVENTLOG_LINES,
301*9c5db199SXin Li                                       ignore_status=True)
302*9c5db199SXin Li            wake_elog = (['unknown'] + re.findall(r'Wake Source.*', elog))[-1]
303*9c5db199SXin Li            for line in reversed(self._logs):
304*9c5db199SXin Li                match = re.search(r'PM1_STS: WAK.*', line)
305*9c5db199SXin Li                if match:
306*9c5db199SXin Li                    wake_syslog = match.group(0)
307*9c5db199SXin Li                    break
308*9c5db199SXin Li            else:
309*9c5db199SXin Li                wake_syslog = 'unknown'
310*9c5db199SXin Li            for b, e, s in sys_power.SpuriousWakeupError.S3_ALLOWLIST:
311*9c5db199SXin Li                if (re.search(b, utils.get_board()) and
312*9c5db199SXin Li                        re.search(e, wake_elog) and re.search(s, wake_syslog)):
313*9c5db199SXin Li                    logging.warning('Allowlisted spurious wake in S3: %s | %s',
314*9c5db199SXin Li                                    wake_elog, wake_syslog)
315*9c5db199SXin Li                    return None
316*9c5db199SXin Li            raise sys_power.SpuriousWakeupError('Spurious wake in S3: %s | %s'
317*9c5db199SXin Li                    % (wake_elog, wake_syslog))
318*9c5db199SXin Li        raise error.TestError('Broken RTC timestamp: ' +
319*9c5db199SXin Li                              utils.read_file(self.HWCLOCK_FILE))
320*9c5db199SXin Li
321*9c5db199SXin Li
322*9c5db199SXin Li    def _firmware_resume_time(self):
323*9c5db199SXin Li        """Calculate seconds for firmware resume from logged TSC. (x86 only)"""
324*9c5db199SXin Li        if utils.get_arch() not in ['i686', 'x86_64']:
325*9c5db199SXin Li            # TODO: support this on ARM somehow
326*9c5db199SXin Li            return 0
327*9c5db199SXin Li        regex_freeze = re.compile(r'PM: resume from suspend-to-idle')
328*9c5db199SXin Li        regex_tsc = re.compile(r'TSC at resume: (\d+)$')
329*9c5db199SXin Li        freq = 1000 * int(utils.read_one_line(
330*9c5db199SXin Li                '/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq'))
331*9c5db199SXin Li        for line in reversed(self._logs):
332*9c5db199SXin Li            match_freeze = regex_freeze.search(line)
333*9c5db199SXin Li            if match_freeze:
334*9c5db199SXin Li                logging.info('fw resume time zero due to suspend-to-idle\n')
335*9c5db199SXin Li                return 0
336*9c5db199SXin Li            match_tsc = regex_tsc.search(line)
337*9c5db199SXin Li            if match_tsc:
338*9c5db199SXin Li                return float(match_tsc.group(1)) / freq
339*9c5db199SXin Li
340*9c5db199SXin Li        raise error.TestError('Failed to find TSC resume value in syslog.')
341*9c5db199SXin Li
342*9c5db199SXin Li
343*9c5db199SXin Li    def _device_resume_time(self):
344*9c5db199SXin Li        """Read amount of seconds for overall device resume from syslog."""
345*9c5db199SXin Li        regex = re.compile(r'PM: resume of devices complete after ([0-9.]+)')
346*9c5db199SXin Li        for line in reversed(self._logs):
347*9c5db199SXin Li            match = regex.search(line)
348*9c5db199SXin Li            if match:
349*9c5db199SXin Li                return float(match.group(1)) / 1000
350*9c5db199SXin Li
351*9c5db199SXin Li        raise error.TestError('Failed to find device resume time in syslog.')
352*9c5db199SXin Li
353*9c5db199SXin Li
354*9c5db199SXin Li    def _get_phase_times(self):
355*9c5db199SXin Li        phase_times = []
356*9c5db199SXin Li        regex = re.compile(r'PM: (\w+ )?(resume|suspend) of devices complete')
357*9c5db199SXin Li        for line in self._logs:
358*9c5db199SXin Li            match = regex.search(line)
359*9c5db199SXin Li            if match:
360*9c5db199SXin Li                ts = cros_logging.extract_kernel_timestamp(line)
361*9c5db199SXin Li                phase = match.group(1)
362*9c5db199SXin Li                if not phase:
363*9c5db199SXin Li                    phase = 'REG'
364*9c5db199SXin Li                phase_times.append((phase.upper(), ts))
365*9c5db199SXin Li        return sorted(phase_times, key = lambda entry: entry[1])
366*9c5db199SXin Li
367*9c5db199SXin Li
368*9c5db199SXin Li    def _get_phase(self, ts, phase_table, dev):
369*9c5db199SXin Li        for entry in phase_table:
370*9c5db199SXin Li            #checking if timestamp was before that phase's cutoff
371*9c5db199SXin Li            if ts < entry[1]:
372*9c5db199SXin Li                return entry[0]
373*9c5db199SXin Li        raise error.TestError('Device %s has a timestamp after all devices %s',
374*9c5db199SXin Li                              dev, 'had already resumed')
375*9c5db199SXin Li
376*9c5db199SXin Li
377*9c5db199SXin Li    def _individual_device_times(self, start_resume):
378*9c5db199SXin Li        """Return dict of individual device suspend and resume times."""
379*9c5db199SXin Li        self.device_times.append(dict())
380*9c5db199SXin Li        dev_details = collections.defaultdict(dict)
381*9c5db199SXin Li        regex = re.compile(r'call ([^ ]+)\+ returned 0 after ([0-9]+) usecs')
382*9c5db199SXin Li        phase_table = self._get_phase_times()
383*9c5db199SXin Li        for line in self._logs:
384*9c5db199SXin Li            match = regex.search(line)
385*9c5db199SXin Li            if match:
386*9c5db199SXin Li                device = match.group(1).replace(':', '-')
387*9c5db199SXin Li                key = 'seconds_dev_' + device
388*9c5db199SXin Li                secs = float(match.group(2)) / 1e6
389*9c5db199SXin Li                ts = cros_logging.extract_kernel_timestamp(line)
390*9c5db199SXin Li                if ts > start_resume:
391*9c5db199SXin Li                    key += '_resume'
392*9c5db199SXin Li                else:
393*9c5db199SXin Li                    key += '_suspend'
394*9c5db199SXin Li                #looking if we're in a special phase
395*9c5db199SXin Li                phase = self._get_phase(ts, phase_table, device)
396*9c5db199SXin Li                dev = dev_details[key]
397*9c5db199SXin Li                if phase in dev:
398*9c5db199SXin Li                    logging.warning('Duplicate %s entry for device %s, +%f',
399*9c5db199SXin Li                                    phase, device, secs)
400*9c5db199SXin Li                    dev[phase] += secs
401*9c5db199SXin Li                else:
402*9c5db199SXin Li                    dev[phase] = secs
403*9c5db199SXin Li
404*9c5db199SXin Li        for dev_key, dev in dev_details.items():
405*9c5db199SXin Li            total_secs = sum(dev.values())
406*9c5db199SXin Li            self.device_times[-1][dev_key] = total_secs
407*9c5db199SXin Li            report = '%s: %f TOT' % (dev_key, total_secs)
408*9c5db199SXin Li            for phase in dev.keys():
409*9c5db199SXin Li                if phase is 'REG':
410*9c5db199SXin Li                    continue
411*9c5db199SXin Li                report += ', %f %s' % (dev[phase], phase)
412*9c5db199SXin Li            logging.debug(report)
413*9c5db199SXin Li
414*9c5db199SXin Li
415*9c5db199SXin Li    def _identify_driver(self, device):
416*9c5db199SXin Li        """Return the driver name of a device (or "unknown")."""
417*9c5db199SXin Li        for path, subdirs, _ in os.walk('/sys/devices'):
418*9c5db199SXin Li            if device in subdirs:
419*9c5db199SXin Li                node = os.path.join(path, device, 'driver')
420*9c5db199SXin Li                if not os.path.exists(node):
421*9c5db199SXin Li                    return "unknown"
422*9c5db199SXin Li                return os.path.basename(os.path.realpath(node))
423*9c5db199SXin Li        else:
424*9c5db199SXin Li            return "unknown"
425*9c5db199SXin Li
426*9c5db199SXin Li
427*9c5db199SXin Li    def _check_for_errors(self, ignore_kernel_warns):
428*9c5db199SXin Li        """Find and identify suspend errors.
429*9c5db199SXin Li
430*9c5db199SXin Li        @param ignore_kernel_warns: Ignore kernel errors.
431*9c5db199SXin Li
432*9c5db199SXin Li        @returns: True iff we should retry.
433*9c5db199SXin Li
434*9c5db199SXin Li        @raises:
435*9c5db199SXin Li          sys_power.KernelError: for non-allowlisted kernel failures.
436*9c5db199SXin Li          sys_power.SuspendTimeout: took too long to enter suspend.
437*9c5db199SXin Li          sys_power.SpuriousWakeupError: woke too soon from suspend.
438*9c5db199SXin Li          sys_power.SuspendFailure: unidentified failure.
439*9c5db199SXin Li        """
440*9c5db199SXin Li        warning_regex = re.compile(r' kernel: \[.*WARNING:')
441*9c5db199SXin Li        abort_regex = re.compile(r' kernel: \[.*Freezing of tasks abort'
442*9c5db199SXin Li                r'| powerd_suspend\[.*Cancel suspend at kernel'
443*9c5db199SXin Li                r'| powerd_suspend\[.*Warning: Device or resource busy on ' \
444*9c5db199SXin Li                 'write to /sys/power/state')
445*9c5db199SXin Li        # rsyslogd can put this out of order with dmesg, so track in variable
446*9c5db199SXin Li        fail_regex = re.compile(r'powerd_suspend\[\d+\]: Error')
447*9c5db199SXin Li        failed = False
448*9c5db199SXin Li
449*9c5db199SXin Li        # TODO(scottz): warning_monitor crosbug.com/38092
450*9c5db199SXin Li        log_len = len(self._logs)
451*9c5db199SXin Li        for i in range(log_len):
452*9c5db199SXin Li            line = self._logs[i]
453*9c5db199SXin Li            if warning_regex.search(line):
454*9c5db199SXin Li                # match the source file from the WARNING line, and the
455*9c5db199SXin Li                # actual error text by peeking one or two lines below that
456*9c5db199SXin Li                src = cros_logging.strip_timestamp(line)
457*9c5db199SXin Li                text = ''
458*9c5db199SXin Li                if i+1 < log_len:
459*9c5db199SXin Li                    text = cros_logging.strip_timestamp(self._logs[i + 1])
460*9c5db199SXin Li                if i+2 < log_len:
461*9c5db199SXin Li                    text += '\n' + cros_logging.strip_timestamp(
462*9c5db199SXin Li                        self._logs[i + 2])
463*9c5db199SXin Li                for p1, p2 in sys_power.KernelError.ALLOWLIST:
464*9c5db199SXin Li                    if re.search(p1, src) and re.search(p2, text):
465*9c5db199SXin Li                        logging.info('Allowlisted KernelError: %s', src)
466*9c5db199SXin Li                        break
467*9c5db199SXin Li                else:
468*9c5db199SXin Li                    if ignore_kernel_warns:
469*9c5db199SXin Li                        logging.warning('Non-allowlisted KernelError: %s', src)
470*9c5db199SXin Li                    else:
471*9c5db199SXin Li                        raise sys_power.KernelError("%s\n%s" % (src, text))
472*9c5db199SXin Li            if abort_regex.search(line):
473*9c5db199SXin Li                wake_source = 'unknown'
474*9c5db199SXin Li                match = re.search(r'last active wakeup source: (.*)$',
475*9c5db199SXin Li                        '\n'.join(self._logs[i-5:i+3]), re.MULTILINE)
476*9c5db199SXin Li                if match:
477*9c5db199SXin Li                    wake_source = match.group(1)
478*9c5db199SXin Li                driver = self._identify_driver(wake_source)
479*9c5db199SXin Li                for b, w in sys_power.SpuriousWakeupError.S0_ALLOWLIST:
480*9c5db199SXin Li                    if (re.search(b, utils.get_board()) and
481*9c5db199SXin Li                            re.search(w, wake_source)):
482*9c5db199SXin Li                        logging.warning('Allowlisted spurious wake before '
483*9c5db199SXin Li                                        'S3: %s | %s', wake_source, driver)
484*9c5db199SXin Li                        return True
485*9c5db199SXin Li                if "rtc" in driver:
486*9c5db199SXin Li                    raise sys_power.SuspendTimeout('System took too '
487*9c5db199SXin Li                                                   'long to suspend.')
488*9c5db199SXin Li                raise sys_power.SpuriousWakeupError('Spurious wake '
489*9c5db199SXin Li                        'before S3: %s | %s' % (wake_source, driver))
490*9c5db199SXin Li            if fail_regex.search(line):
491*9c5db199SXin Li                failed = True
492*9c5db199SXin Li
493*9c5db199SXin Li        if failed:
494*9c5db199SXin Li            raise sys_power.SuspendFailure('Unidentified problem.')
495*9c5db199SXin Li        return False
496*9c5db199SXin Li
497*9c5db199SXin Li
498*9c5db199SXin Li    def _arc_resume_ts(self, retries=3):
499*9c5db199SXin Li        """Parse arc logcat for arc resume timestamp.
500*9c5db199SXin Li
501*9c5db199SXin Li        @param retries: retry if the expected log cannot be found in logcat.
502*9c5db199SXin Li
503*9c5db199SXin Li        @returns: a float representing arc resume timestamp in  CPU seconds
504*9c5db199SXin Li                  starting from the last boot if arc logcat resume log is parsed
505*9c5db199SXin Li                  correctly; otherwise None.
506*9c5db199SXin Li
507*9c5db199SXin Li        """
508*9c5db199SXin Li        command = 'android-sh -c "logcat -v monotonic -t 300 *:silent' \
509*9c5db199SXin Li                  ' ArcPowerManagerService:D"'
510*9c5db199SXin Li        regex_resume = re.compile(r'^\s*(\d+\.\d+).*ArcPowerManagerService: '
511*9c5db199SXin Li                                  'Device finished resuming$')
512*9c5db199SXin Li        for retry in range(retries + 1):
513*9c5db199SXin Li            arc_logcat = utils.system_output(command, ignore_status=False)
514*9c5db199SXin Li            arc_logcat = arc_logcat.splitlines()
515*9c5db199SXin Li            for line in arc_logcat:
516*9c5db199SXin Li                logging.debug('arc_resume: %s', line)
517*9c5db199SXin Li                match_resume = regex_resume.search(line)
518*9c5db199SXin Li                # ARC logcat is cleared before suspend so the first ARC resume
519*9c5db199SXin Li                # timestamp is the one we are looking for.
520*9c5db199SXin Li                if match_resume:
521*9c5db199SXin Li                    return float(match_resume.group(1))
522*9c5db199SXin Li            time.sleep(0.005 * 2**retry)
523*9c5db199SXin Li        else:
524*9c5db199SXin Li            logging.error('ARC did not resume correctly.')
525*9c5db199SXin Li            return None
526*9c5db199SXin Li
527*9c5db199SXin Li
528*9c5db199SXin Li    def get_suspend_delay(self):
529*9c5db199SXin Li        return self._SUSPEND_DELAY.get(self._get_board(),
530*9c5db199SXin Li                                       self._DEFAULT_SUSPEND_DELAY)
531*9c5db199SXin Li
532*9c5db199SXin Li
533*9c5db199SXin Li    def suspend(self, duration=10, ignore_kernel_warns=False,
534*9c5db199SXin Li                measure_arc=False):
535*9c5db199SXin Li        """
536*9c5db199SXin Li        Do a single suspend for 'duration' seconds. Estimates the amount of time
537*9c5db199SXin Li        it takes to suspend for a board (see _SUSPEND_DELAY), so the actual RTC
538*9c5db199SXin Li        wakeup delay will be longer. Returns None on errors, or raises the
539*9c5db199SXin Li        exception when _throw is set. Returns a dict of general measurements,
540*9c5db199SXin Li        or a tuple (general_measurements, individual_device_times) when
541*9c5db199SXin Li        _device_times is set.
542*9c5db199SXin Li
543*9c5db199SXin Li        @param duration: time in seconds to do a suspend prior to waking.
544*9c5db199SXin Li        @param ignore_kernel_warns: Ignore kernel errors.  Defaults to false.
545*9c5db199SXin Li        """
546*9c5db199SXin Li
547*9c5db199SXin Li        if power_utils.get_sleep_state() == 'freeze':
548*9c5db199SXin Li            arch = utils.get_arch()
549*9c5db199SXin Li
550*9c5db199SXin Li            if arch == 'x86_64':
551*9c5db199SXin Li                self._s0ix_residency_stats = power_status.S0ixResidencyStats()
552*9c5db199SXin Li            elif arch == 'aarch64':
553*9c5db199SXin Li                self._s2idle_residency_stats = \
554*9c5db199SXin Li                    power_status.S2IdleResidencyStats()
555*9c5db199SXin Li
556*9c5db199SXin Li        try:
557*9c5db199SXin Li            iteration = len(self.failures) + len(self.successes) + 1
558*9c5db199SXin Li            # Retry suspend in case we hit a known (allowlisted) bug
559*9c5db199SXin Li            for _ in range(10):
560*9c5db199SXin Li                # Clear powerd_suspend RTC timestamp, to avoid stale results.
561*9c5db199SXin Li                utils.open_write_close(self.HWCLOCK_FILE, '')
562*9c5db199SXin Li                self._reset_logs()
563*9c5db199SXin Li                utils.system('sync')
564*9c5db199SXin Li                board_delay = self.get_suspend_delay()
565*9c5db199SXin Li                # Clear the ARC logcat to make parsing easier.
566*9c5db199SXin Li                if measure_arc:
567*9c5db199SXin Li                    command = 'android-sh -c "logcat -c"'
568*9c5db199SXin Li                    utils.system(command, ignore_status=False)
569*9c5db199SXin Li                try:
570*9c5db199SXin Li                    # Return value of suspend method can be None
571*9c5db199SXin Li                    alarm = self._suspend(duration + board_delay)
572*9c5db199SXin Li                except sys_power.SpuriousWakeupError:
573*9c5db199SXin Li                    # might be another error, we check for it ourselves below
574*9c5db199SXin Li                    alarm = self._ALARM_FORCE_EARLY_WAKEUP
575*9c5db199SXin Li
576*9c5db199SXin Li                log_data = subprocess.check_output(['cbmem',
577*9c5db199SXin Li                                                    '-1']).decode('utf-8')
578*9c5db199SXin Li
579*9c5db199SXin Li                for msg in re.findall(r'^.*ERROR.*$', log_data, re.M):
580*9c5db199SXin Li                    for board, pattern in sys_power.FirmwareError.ALLOWLIST:
581*9c5db199SXin Li                        if (re.search(board, utils.get_board())
582*9c5db199SXin Li                                    and re.search(pattern, msg)):
583*9c5db199SXin Li                            logging.info('Allowlisted FW error: %s', msg)
584*9c5db199SXin Li                            break
585*9c5db199SXin Li                    else:
586*9c5db199SXin Li                        firmware_log = os.path.join(self._logdir,
587*9c5db199SXin Li                                'firmware.log.' + str(iteration))
588*9c5db199SXin Li                        with open(firmware_log, 'w') as f:
589*9c5db199SXin Li                            f.write(log_data)
590*9c5db199SXin Li                            logging.info('Saved firmware log: %s',
591*9c5db199SXin Li                                         firmware_log)
592*9c5db199SXin Li
593*9c5db199SXin Li                        raise sys_power.FirmwareError(
594*9c5db199SXin Li                                    msg.strip('\r\n '))
595*9c5db199SXin Li
596*9c5db199SXin Li                if not self._check_resume_finished():
597*9c5db199SXin Li                    if not self._aborted_due_to_locking():
598*9c5db199SXin Li                        raise error.TestError("Confidence check failed: did not try to suspend.")
599*9c5db199SXin Li                    logging.warning('Aborted suspend due to power override, will retry\n')
600*9c5db199SXin Li                    continue
601*9c5db199SXin Li                if not self._check_for_errors(ignore_kernel_warns):
602*9c5db199SXin Li                    hwclock_ts = self._hwclock_ts(alarm)
603*9c5db199SXin Li                    if hwclock_ts:
604*9c5db199SXin Li                        break
605*9c5db199SXin Li
606*9c5db199SXin Li            else:
607*9c5db199SXin Li                raise error.TestWarn('Ten tries failed due to allowlisted bug')
608*9c5db199SXin Li
609*9c5db199SXin Li            # calculate general measurements
610*9c5db199SXin Li            start_resume = self._ts('start_resume_time')
611*9c5db199SXin Li            kernel_down = (self._ts('end_suspend_time') -
612*9c5db199SXin Li                           self._ts('start_suspend_time'))
613*9c5db199SXin Li            kernel_up = self._ts('end_resume_time') - start_resume
614*9c5db199SXin Li            devices_up = self._device_resume_time()
615*9c5db199SXin Li            firmware_up = self._firmware_resume_time()
616*9c5db199SXin Li            # If we do not know the time at which the alarm went off, we cannot
617*9c5db199SXin Li            # calculate the |total_up| and |board_up| time.
618*9c5db199SXin Li            if alarm:
619*9c5db199SXin Li                total_up = hwclock_ts - alarm
620*9c5db199SXin Li                board_up = total_up - kernel_up - firmware_up
621*9c5db199SXin Li            else:
622*9c5db199SXin Li                total_up = -1
623*9c5db199SXin Li                board_up = -1
624*9c5db199SXin Li            try:
625*9c5db199SXin Li                cpu_up = self._ts('cpu_ready_time', 0) - start_resume
626*9c5db199SXin Li            except error.TestError:
627*9c5db199SXin Li                # can be missing on non-SMP machines
628*9c5db199SXin Li                cpu_up = None
629*9c5db199SXin Li            if total_up > self._MAX_RESUME_TIME:
630*9c5db199SXin Li                raise error.TestError('Confidence check failed: missed RTC wakeup.')
631*9c5db199SXin Li
632*9c5db199SXin Li            logging.info('Success(%d): %g down, %g up, %g board, %g firmware, '
633*9c5db199SXin Li                         '%g kernel, %g cpu, %g devices',
634*9c5db199SXin Li                         iteration, kernel_down, total_up, board_up,
635*9c5db199SXin Li                         firmware_up, kernel_up, cpu_up, devices_up)
636*9c5db199SXin Li
637*9c5db199SXin Li            if hasattr(self, '_s0ix_residency_stats'):
638*9c5db199SXin Li                s0ix_residency_secs = \
639*9c5db199SXin Li                        self._s0ix_residency_stats.\
640*9c5db199SXin Li                                get_accumulated_residency_secs()
641*9c5db199SXin Li                cpu_uarch = utils.get_intel_cpu_uarch()
642*9c5db199SXin Li                if not s0ix_residency_secs:
643*9c5db199SXin Li                    msg = 'S0ix residency did not change.'
644*9c5db199SXin Li                    if cpu_uarch not in self._IGNORE_S0IX_RESIDENCY_CHECK:
645*9c5db199SXin Li                        raise sys_power.S0ixResidencyNotChanged(msg)
646*9c5db199SXin Li                    logging.warning(msg)
647*9c5db199SXin Li                logging.info('S0ix residency : %d secs.', s0ix_residency_secs)
648*9c5db199SXin Li            elif hasattr(self, '_s2idle_residency_stats'):
649*9c5db199SXin Li                s2idle_residency_usecs = \
650*9c5db199SXin Li                        self._s2idle_residency_stats.\
651*9c5db199SXin Li                                get_accumulated_residency_usecs()
652*9c5db199SXin Li                if not s2idle_residency_usecs:
653*9c5db199SXin Li                    msg = 's2idle residency did not change.'
654*9c5db199SXin Li                    raise sys_power.S2IdleResidencyNotChanged(msg)
655*9c5db199SXin Li
656*9c5db199SXin Li                logging.info('s2idle residency : %d usecs.',
657*9c5db199SXin Li                             s2idle_residency_usecs)
658*9c5db199SXin Li
659*9c5db199SXin Li            successful_suspend = {
660*9c5db199SXin Li                'seconds_system_suspend': kernel_down,
661*9c5db199SXin Li                'seconds_system_resume': total_up,
662*9c5db199SXin Li                'seconds_system_resume_firmware': firmware_up + board_up,
663*9c5db199SXin Li                'seconds_system_resume_firmware_cpu': firmware_up,
664*9c5db199SXin Li                'seconds_system_resume_firmware_ec': board_up,
665*9c5db199SXin Li                'seconds_system_resume_kernel': kernel_up,
666*9c5db199SXin Li                'seconds_system_resume_kernel_cpu': cpu_up,
667*9c5db199SXin Li                'seconds_system_resume_kernel_dev': devices_up,
668*9c5db199SXin Li                }
669*9c5db199SXin Li
670*9c5db199SXin Li            if measure_arc:
671*9c5db199SXin Li                arc_resume_ts = self._arc_resume_ts()
672*9c5db199SXin Li                if arc_resume_ts:
673*9c5db199SXin Li                    successful_suspend['seconds_system_resume_arc'] = \
674*9c5db199SXin Li                        arc_resume_ts - start_resume
675*9c5db199SXin Li
676*9c5db199SXin Li            self.successes.append(successful_suspend)
677*9c5db199SXin Li
678*9c5db199SXin Li            if hasattr(self, 'device_times'):
679*9c5db199SXin Li                self._individual_device_times(start_resume)
680*9c5db199SXin Li                return (self.successes[-1], self.device_times[-1])
681*9c5db199SXin Li            else:
682*9c5db199SXin Li                return self.successes[-1]
683*9c5db199SXin Li
684*9c5db199SXin Li        except sys_power.SuspendFailure as ex:
685*9c5db199SXin Li            message = '%s(%d): %s' % (type(ex).__name__, iteration, ex)
686*9c5db199SXin Li            logging.error(message)
687*9c5db199SXin Li            self.failures.append(ex)
688*9c5db199SXin Li            if self._throw:
689*9c5db199SXin Li                if type(ex).__name__ in ['KernelError', 'SuspendTimeout']:
690*9c5db199SXin Li                    raise error.TestWarn(message)
691*9c5db199SXin Li                else:
692*9c5db199SXin Li                    raise error.TestFail(message)
693*9c5db199SXin Li            return None
694*9c5db199SXin Li
695*9c5db199SXin Li
696*9c5db199SXin Li    def finalize(self):
697*9c5db199SXin Li        """Restore normal environment (not turning 3G back on for now...)"""
698*9c5db199SXin Li        if os.path.exists(self.HWCLOCK_FILE):
699*9c5db199SXin Li            os.remove(self.HWCLOCK_FILE)
700*9c5db199SXin Li            if self._restart_tlsdated:
701*9c5db199SXin Li                utils.system('initctl start tlsdated')
702*9c5db199SXin Li            if self._reset_pm_print_times:
703*9c5db199SXin Li                self._set_pm_print_times(False)
704*9c5db199SXin Li        if hasattr(self, '_power_pref_changer'):
705*9c5db199SXin Li            self._power_pref_changer.finalize()
706*9c5db199SXin Li
707*9c5db199SXin Li
708*9c5db199SXin Li    def __del__(self):
709*9c5db199SXin Li        self.finalize()
710