xref: /aosp_15_r20/external/autotest/client/cros/power/sys_power.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# Lint as: python2, python3
2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Provides utility methods for controlling powerd in ChromiumOS."""
7
8import errno
9import fcntl
10import logging
11import multiprocessing
12import os
13import time
14
15import common
16from autotest_lib.client.cros import rtc
17from autotest_lib.client.cros import upstart
18
19SYSFS_POWER_STATE = '/sys/power/state'
20SYSFS_WAKEUP_COUNT = '/sys/power/wakeup_count'
21
22PAUSE_ETHERNET_HOOK_FILE = '/run/autotest_pause_ethernet_hook'
23pause_ethernet_file = None
24
25
26class SuspendFailure(Exception):
27    """Base class for a failure during a single suspend/resume cycle."""
28    pass
29
30
31class SuspendTimeout(SuspendFailure):
32    """Suspend took too long, got wakeup event (RTC tick) before it was done."""
33    pass
34
35
36class KernelError(SuspendFailure):
37    """Kernel problem encountered during suspend/resume.
38
39    Allowlist is an array of a 2-entry tuples consisting of (source regexp, text
40    regexp).  For example, kernel output might look like,
41
42      [  597.079950] WARNING: CPU: 0 PID: 21415 at \
43      <path>/v3.18/drivers/gpu/drm/i915/intel_pm.c:3687 \
44      skl_update_other_pipe_wm+0x136/0x18a()
45      [  597.079962] WARN_ON(!wm_changed)
46
47    source regexp should match first line above while text regexp can match
48    up to 2 lines below the source.  Note timestamps are stripped prior to
49    comparing regexp.
50    """
51    ALLOWLIST = [
52        # crosbug.com/37594: debug tracing clock desync we don't care about
53        (r'kernel/trace/ring_buffer.c:\d+ rb_reserve_next_event',
54         r'Delta way too big!'),
55        # TODO(crosbug.com/p/52008): Remove from allowlist once watermark
56        # implementation has landed.
57        (r'v3.18/\S+/intel_pm.c:\d+ skl_update_other_pipe_wm',
58         r'WARN_ON\(\!wm_changed\)')
59    ]
60
61
62class FirmwareError(SuspendFailure):
63    """String 'ERROR' found in firmware log after resume."""
64    ALLOWLIST = [
65            # crosbug.com/36762: no one knows, but it has always been there
66            ('^stumpy', r'PNP: 002e\.4 70 irq size: 0x0000000001 not assigned'
67             ),
68            # crbug.com/221538: no one knows what ME errors mean anyway
69            ('^parrot', r'ME failed to respond'),
70            # b/64684441: eve SKU without eMMC
71            ('^eve', r'Card did not respond to voltage select!'),
72            # b/187561710#comment6: waive mcache error from volteer
73            ('^volteer', r'mcache overflow, should increase CBFS_MCACHE size!'
74             ),
75            # b/221113302: Guybrush do not save firmware log to cbmem during
76            #              resume, so all errors seen actually occurred during boot.
77            #              This workaround can be removed when b/221231786 is fixed.
78            ('^guybrush', r'.*'),
79            # b/195336611: waive the eMMC error for the non-emmc sku
80            ('^brya', r'No known Realtek reader found'),
81    ]
82
83
84class SpuriousWakeupError(SuspendFailure):
85    """Received spurious wakeup while suspending or woke before schedule."""
86    S3_ALLOWLIST = [  # (<board>, <eventlog wake source>, <syslog wake source>)
87        # crbug.com/220014: spurious trackpad IRQs
88        ('^link', 'Wake Source | GPIO | 12', ''),
89        # crbug.com/345327: unknown, probably same as crbug.com/290923
90        ('^x86-alex', '', ''),   # alex can report neither, blanket ignore
91        # crbug.com/355106: unknown, possibly related to crbug.com/290923
92        ('^lumpy|^parrot', '', 'PM1_STS: WAK PWRBTN'),
93    ]
94    S0_ALLOWLIST = [  # (<board>, <kernel wake source>)
95        # crbug.com/290923: spurious keyboard IRQ, believed to be from Servo
96        ('^x86-alex|^lumpy|^parrot|^butterfly', 'serio0'),
97        # crosbug.com/p/46140: battery event caused by MKBP
98        ('^elm|^oak|^hana', 'spi32766.0'),
99    ]
100
101
102class MemoryError(SuspendFailure):
103    """memory_suspend_test found memory corruption."""
104    pass
105
106
107class SuspendNotAllowed(SuspendFailure):
108    """Suspend was not allowed to be performed."""
109    pass
110
111
112class S0ixResidencyNotChanged(SuspendFailure):
113    """power_SuspendStress test found CPU/SoC is unable to idle properly
114    when suspended to S0ix. """
115    pass
116
117
118class S2IdleResidencyNotChanged(SuspendFailure):
119    """power_SuspendStress test found CPU/SoC is unable to idle properly
120    when suspended to s2idle. """
121    pass
122
123
124def prepare_wakeup(seconds):
125    """Prepare the device to wake up from an upcoming suspend.
126
127    @param seconds: The number of seconds to allow the device to suspend.
128    """
129    # May cause DUT not wake from sleep if the suspend time is 1 second.
130    # It happens when the current clock (floating point) is close to the
131    # next integer, as the RTC sysfs interface only accepts integers.
132    # Make sure it is larger than or equal to 2.
133    assert seconds >= 2
134    wakeup_count = read_wakeup_count()
135    estimated_alarm = int(rtc.get_seconds() + seconds)
136    logging.debug('Suspend for %d seconds, estimated wakealarm = %d',
137                  seconds, estimated_alarm)
138    return (estimated_alarm, wakeup_count)
139
140
141def check_wakeup(estimated_alarm):
142    """Verify that the device did not wakeup early.
143
144    @param estimated_alarm: The lower bound time at which the device was
145                            expected to wake up.
146    """
147    now = rtc.get_seconds()
148    if now < estimated_alarm:
149        logging.error('Woke up %d secs early', (estimated_alarm - now))
150        raise SpuriousWakeupError('Woke from suspend early')
151
152
153def pause_check_network_hook():
154    """Stop check_ethernet.hook from running.
155
156    Lock will be held until test exits, or resume_check_network_hook() is
157    called. check_ethernet.hook may also "break" the lock if it judges you've
158    held it too long.
159
160    Can be called multiple times, to refresh the lock.
161    """
162    # If this function is called multiple times, we intentionally re-assign
163    # this global, which closes out the old lock and grabs it anew.
164    # We intentionally "touch" the file to update its mtime, so we can judge
165    # how long locks are held.
166    global pause_ethernet_file
167    pause_ethernet_file = open(PAUSE_ETHERNET_HOOK_FILE, 'w+')
168    try:
169        # This is a blocking call unless an error occurs.
170        fcntl.flock(pause_ethernet_file, fcntl.LOCK_EX)
171    except IOError:
172        pass
173
174
175def resume_check_network_hook():
176    """Resume check_ethernet.hook.
177
178    Must call pause_check_network_hook() at least once before calling this.
179    """
180    global pause_ethernet_file
181    # Closing the file descriptor releases the lock.
182    pause_ethernet_file.close()
183    pause_ethernet_file = None
184
185
186def do_suspend(wakeup_timeout, delay_seconds=0):
187    """Suspend using the power manager with a wakeup timeout.
188
189    Wait for |delay_seconds|, suspend the system(S3/S0iX) using
190    powerd_dbus_suspend program. powerd_dbus_suspend first sets a wakeup alarm
191    on the dut for |current_time + wakeup_timeout|. Thus the device tries to
192    resume at |current_time + wakeup_timeout| irrespective of when it suspended.
193    This means that RTC can trigger an early resume and prevent suspend.
194
195    Function will block until suspend/resume has completed or failed.
196    Returns the wake alarm time from the RTC as epoch.
197
198    @param wakeup_timeout: time from now after which the device has to.
199    @param delay_seconds: Number of seconds wait before suspending the DUT.
200
201    """
202    pause_check_network_hook()
203    estimated_alarm, wakeup_count = prepare_wakeup(wakeup_timeout)
204    upstart.ensure_running('powerd')
205    command = ('/usr/bin/powerd_dbus_suspend --delay=%d --timeout=30 '
206               '--wakeup_count=%d --wakeup_timeout=%d' %
207               (delay_seconds, wakeup_count, wakeup_timeout))
208    logging.info("Running '%s'", command)
209    os.system(command)
210    check_wakeup(estimated_alarm)
211    return estimated_alarm
212
213
214def suspend_for(time_in_suspend, delay_seconds=0):
215    """Suspend using the power manager and spend |time_in_suspend| in suspend.
216
217    Wait for |delay_seconds|, suspend the system(S3/S0iX) using
218    powerd_dbus_suspend program. power manager sets a wakeup alarm on the dut
219    for |time_in_suspend| just before suspending
220    (writing power state to /sys/power/state). Thus the device spends most of
221    |time_in_suspend| in S0iX/S3.
222
223    Function will block until suspend/resume has completed or failed.
224    Returns the wake alarm time from the RTC as epoch.
225
226    @param time_in_suspend: Number of seconds to suspend the DUT.
227    @param delay_seconds: Number of seconds wait before suspending the DUT.
228    """
229    pause_check_network_hook()
230
231    wakeup_count = read_wakeup_count()
232    upstart.ensure_running('powerd')
233    command = ('/usr/bin/powerd_dbus_suspend --delay=%d --timeout=30 '
234               '--wakeup_count=%d --suspend_for_sec=%d' %
235               (delay_seconds, wakeup_count, time_in_suspend))
236    logging.info("Running '%s'", command)
237    os.system(command)
238
239
240def suspend_bg_for_dark_resume(suspend_seconds, delay_seconds=0):
241    """Do a non-blocking suspend using power manager.
242
243    Wait for |delay_seconds|, then suspend with an rtc alarm for
244    waking up again after having been suspended for |suspend_seconds|, using
245    the powerd_dbus_suspend program all in the background.
246
247    @param suspend_seconds: Number of seconds to suspend the DUT.
248    @param delay_seconds: Number of seconds wait before suspending the DUT.
249
250    """
251    upstart.ensure_running('powerd')
252    # Disarm any existing wake alarms so as to prevent early wakeups.
253    os.system('echo 0 > /sys/class/rtc/rtc0/wakealarm')
254    wakeup_count = read_wakeup_count()
255    command = ('/usr/bin/powerd_dbus_suspend --delay=%d --timeout=30 '
256               '--wakeup_count=%d --wakeup_timeout=%d '
257               '--disable_dark_resume=false' %
258               (delay_seconds, wakeup_count, suspend_seconds))
259    logging.info("Running '%s'", command)
260    process = multiprocessing.Process(target=os.system, args=(command,))
261    process.start()
262
263
264def kernel_suspend(seconds, state='mem'):
265    """Do a kernel suspend.
266
267    Suspend the system to @state, waking up again after |seconds|, by directly
268    writing to /sys/power/state. Function will block until suspend/resume has
269    completed or failed.
270
271    @param seconds: The number of seconds to suspend the device.
272    @param state: power state to suspend to.  DEFAULT mem.
273    """
274    alarm, wakeup_count = prepare_wakeup(seconds)
275    rtc.set_wake_alarm(alarm)
276    logging.debug('Saving wakeup count: %d', wakeup_count)
277    write_wakeup_count(wakeup_count)
278    try:
279        logging.info('Suspending at %d', rtc.get_seconds())
280        with open(SYSFS_POWER_STATE, 'w') as sysfs_file:
281            sysfs_file.write(state)
282    except IOError as e:
283        logging.exception('Writing to %s failed', SYSFS_POWER_STATE)
284        if e.errno == errno.EBUSY and rtc.get_seconds() >= alarm:
285            # The kernel returns EBUSY if it has to abort because
286            # another wakeup fires before we've reached suspend.
287            raise SpuriousWakeupError('Received spurious wakeup in kernel.')
288        else:
289            # Some driver probably failed to suspend properly.
290            # A hint as to what failed is in errno.
291            raise KernelError('Suspend failed: %s' % e.strerror)
292    else:
293        logging.info('Woke from suspend at %d', rtc.get_seconds())
294    logging.debug('New wakeup count: %d', read_wakeup_count())
295    check_wakeup(alarm)
296
297
298def idle_suspend(seconds):
299    """
300    Wait for the system to suspend to RAM (S3), scheduling the RTC to wake up
301    |seconds| after this function was called. Caller must ensure that the system
302    will idle-suspend in time for this to happen. Returns the wake alarm time
303    from the RTC as epoch.
304
305    @param seconds: The number of seconds before wakeup.
306    """
307    alarm, _ = prepare_wakeup(seconds)
308    rtc.set_wake_alarm(alarm)
309    while rtc.get_seconds() < alarm:
310        time.sleep(0.2)
311
312    # tell powerd something happened, or it will immediately idle-suspend again
313    # TODO: switch to cros.power_utils#call_powerd_dbus_method once this
314    # horrible mess with the WiFi tests and this file's imports is solved
315    logging.debug('Simulating user activity after idle suspend...')
316    os.system('dbus-send --type=method_call --system '
317              '--dest=org.chromium.PowerManager /org/chromium/PowerManager '
318              'org.chromium.PowerManager.HandleUserActivity')
319
320    return alarm
321
322
323def memory_suspend(seconds, size=0):
324    """Do a memory suspend.
325
326    Suspend the system to RAM (S3), waking up again after |seconds|, using
327    the memory_suspend_test tool. Function will block until suspend/resume has
328    completed or failed.
329
330    @param seconds: The number of seconds to suspend the device.
331    @param size: Amount of memory to allocate, in bytes.
332                 Set to 0 to let memory_suspend_test determine amount of memory.
333    """
334    # since we cannot have utils.system_output in here, we need a workaround
335    output = '/tmp/memory_suspend_output'
336    wakeup_count = read_wakeup_count()
337    status = os.system('/usr/bin/memory_suspend_test --wakeup_count=%d '
338                       '--size=%d > %s --suspend_for_sec=%d' %
339                       (wakeup_count, size, output, seconds))
340    status = os.WEXITSTATUS(status)
341    if status == 2:
342        logging.error('memory_suspend_test found the following errors:')
343        for line in open(output, 'r'):
344            logging.error(line)
345        raise MemoryError('Memory corruption found after resume')
346    elif status == 1:
347        raise SuspendFailure('Failure in powerd_suspend during memory test')
348    elif status:
349        raise SuspendFailure('Unknown failure in memory_suspend_test (crash?)')
350
351
352def read_wakeup_count():
353    """
354    Retrieves the current value of /sys/power/wakeup_count.
355    """
356    with open(SYSFS_WAKEUP_COUNT) as sysfs_file:
357        return int(sysfs_file.read().rstrip('\n'))
358
359
360def write_wakeup_count(wakeup_count):
361    """Writes a value to /sys/power/wakeup_count.
362
363    @param wakeup_count: The wakeup count value to write.
364    """
365    try:
366        with open(SYSFS_WAKEUP_COUNT, 'w') as sysfs_file:
367            sysfs_file.write(str(wakeup_count))
368    except IOError as e:
369        if (e.errno == errno.EINVAL and read_wakeup_count() != wakeup_count):
370            raise SpuriousWakeupError('wakeup_count changed before suspend')
371        else:
372            raise
373