1# Lint as: python2, python3 2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Provides utility methods for controlling powerd in ChromiumOS.""" 7 8import errno 9import fcntl 10import logging 11import multiprocessing 12import os 13import time 14 15import common 16from autotest_lib.client.cros import rtc 17from autotest_lib.client.cros import upstart 18 19SYSFS_POWER_STATE = '/sys/power/state' 20SYSFS_WAKEUP_COUNT = '/sys/power/wakeup_count' 21 22PAUSE_ETHERNET_HOOK_FILE = '/run/autotest_pause_ethernet_hook' 23pause_ethernet_file = None 24 25 26class SuspendFailure(Exception): 27 """Base class for a failure during a single suspend/resume cycle.""" 28 pass 29 30 31class SuspendTimeout(SuspendFailure): 32 """Suspend took too long, got wakeup event (RTC tick) before it was done.""" 33 pass 34 35 36class KernelError(SuspendFailure): 37 """Kernel problem encountered during suspend/resume. 38 39 Allowlist is an array of a 2-entry tuples consisting of (source regexp, text 40 regexp). For example, kernel output might look like, 41 42 [ 597.079950] WARNING: CPU: 0 PID: 21415 at \ 43 <path>/v3.18/drivers/gpu/drm/i915/intel_pm.c:3687 \ 44 skl_update_other_pipe_wm+0x136/0x18a() 45 [ 597.079962] WARN_ON(!wm_changed) 46 47 source regexp should match first line above while text regexp can match 48 up to 2 lines below the source. Note timestamps are stripped prior to 49 comparing regexp. 50 """ 51 ALLOWLIST = [ 52 # crosbug.com/37594: debug tracing clock desync we don't care about 53 (r'kernel/trace/ring_buffer.c:\d+ rb_reserve_next_event', 54 r'Delta way too big!'), 55 # TODO(crosbug.com/p/52008): Remove from allowlist once watermark 56 # implementation has landed. 57 (r'v3.18/\S+/intel_pm.c:\d+ skl_update_other_pipe_wm', 58 r'WARN_ON\(\!wm_changed\)') 59 ] 60 61 62class FirmwareError(SuspendFailure): 63 """String 'ERROR' found in firmware log after resume.""" 64 ALLOWLIST = [ 65 # crosbug.com/36762: no one knows, but it has always been there 66 ('^stumpy', r'PNP: 002e\.4 70 irq size: 0x0000000001 not assigned' 67 ), 68 # crbug.com/221538: no one knows what ME errors mean anyway 69 ('^parrot', r'ME failed to respond'), 70 # b/64684441: eve SKU without eMMC 71 ('^eve', r'Card did not respond to voltage select!'), 72 # b/187561710#comment6: waive mcache error from volteer 73 ('^volteer', r'mcache overflow, should increase CBFS_MCACHE size!' 74 ), 75 # b/221113302: Guybrush do not save firmware log to cbmem during 76 # resume, so all errors seen actually occurred during boot. 77 # This workaround can be removed when b/221231786 is fixed. 78 ('^guybrush', r'.*'), 79 # b/195336611: waive the eMMC error for the non-emmc sku 80 ('^brya', r'No known Realtek reader found'), 81 ] 82 83 84class SpuriousWakeupError(SuspendFailure): 85 """Received spurious wakeup while suspending or woke before schedule.""" 86 S3_ALLOWLIST = [ # (<board>, <eventlog wake source>, <syslog wake source>) 87 # crbug.com/220014: spurious trackpad IRQs 88 ('^link', 'Wake Source | GPIO | 12', ''), 89 # crbug.com/345327: unknown, probably same as crbug.com/290923 90 ('^x86-alex', '', ''), # alex can report neither, blanket ignore 91 # crbug.com/355106: unknown, possibly related to crbug.com/290923 92 ('^lumpy|^parrot', '', 'PM1_STS: WAK PWRBTN'), 93 ] 94 S0_ALLOWLIST = [ # (<board>, <kernel wake source>) 95 # crbug.com/290923: spurious keyboard IRQ, believed to be from Servo 96 ('^x86-alex|^lumpy|^parrot|^butterfly', 'serio0'), 97 # crosbug.com/p/46140: battery event caused by MKBP 98 ('^elm|^oak|^hana', 'spi32766.0'), 99 ] 100 101 102class MemoryError(SuspendFailure): 103 """memory_suspend_test found memory corruption.""" 104 pass 105 106 107class SuspendNotAllowed(SuspendFailure): 108 """Suspend was not allowed to be performed.""" 109 pass 110 111 112class S0ixResidencyNotChanged(SuspendFailure): 113 """power_SuspendStress test found CPU/SoC is unable to idle properly 114 when suspended to S0ix. """ 115 pass 116 117 118class S2IdleResidencyNotChanged(SuspendFailure): 119 """power_SuspendStress test found CPU/SoC is unable to idle properly 120 when suspended to s2idle. """ 121 pass 122 123 124def prepare_wakeup(seconds): 125 """Prepare the device to wake up from an upcoming suspend. 126 127 @param seconds: The number of seconds to allow the device to suspend. 128 """ 129 # May cause DUT not wake from sleep if the suspend time is 1 second. 130 # It happens when the current clock (floating point) is close to the 131 # next integer, as the RTC sysfs interface only accepts integers. 132 # Make sure it is larger than or equal to 2. 133 assert seconds >= 2 134 wakeup_count = read_wakeup_count() 135 estimated_alarm = int(rtc.get_seconds() + seconds) 136 logging.debug('Suspend for %d seconds, estimated wakealarm = %d', 137 seconds, estimated_alarm) 138 return (estimated_alarm, wakeup_count) 139 140 141def check_wakeup(estimated_alarm): 142 """Verify that the device did not wakeup early. 143 144 @param estimated_alarm: The lower bound time at which the device was 145 expected to wake up. 146 """ 147 now = rtc.get_seconds() 148 if now < estimated_alarm: 149 logging.error('Woke up %d secs early', (estimated_alarm - now)) 150 raise SpuriousWakeupError('Woke from suspend early') 151 152 153def pause_check_network_hook(): 154 """Stop check_ethernet.hook from running. 155 156 Lock will be held until test exits, or resume_check_network_hook() is 157 called. check_ethernet.hook may also "break" the lock if it judges you've 158 held it too long. 159 160 Can be called multiple times, to refresh the lock. 161 """ 162 # If this function is called multiple times, we intentionally re-assign 163 # this global, which closes out the old lock and grabs it anew. 164 # We intentionally "touch" the file to update its mtime, so we can judge 165 # how long locks are held. 166 global pause_ethernet_file 167 pause_ethernet_file = open(PAUSE_ETHERNET_HOOK_FILE, 'w+') 168 try: 169 # This is a blocking call unless an error occurs. 170 fcntl.flock(pause_ethernet_file, fcntl.LOCK_EX) 171 except IOError: 172 pass 173 174 175def resume_check_network_hook(): 176 """Resume check_ethernet.hook. 177 178 Must call pause_check_network_hook() at least once before calling this. 179 """ 180 global pause_ethernet_file 181 # Closing the file descriptor releases the lock. 182 pause_ethernet_file.close() 183 pause_ethernet_file = None 184 185 186def do_suspend(wakeup_timeout, delay_seconds=0): 187 """Suspend using the power manager with a wakeup timeout. 188 189 Wait for |delay_seconds|, suspend the system(S3/S0iX) using 190 powerd_dbus_suspend program. powerd_dbus_suspend first sets a wakeup alarm 191 on the dut for |current_time + wakeup_timeout|. Thus the device tries to 192 resume at |current_time + wakeup_timeout| irrespective of when it suspended. 193 This means that RTC can trigger an early resume and prevent suspend. 194 195 Function will block until suspend/resume has completed or failed. 196 Returns the wake alarm time from the RTC as epoch. 197 198 @param wakeup_timeout: time from now after which the device has to. 199 @param delay_seconds: Number of seconds wait before suspending the DUT. 200 201 """ 202 pause_check_network_hook() 203 estimated_alarm, wakeup_count = prepare_wakeup(wakeup_timeout) 204 upstart.ensure_running('powerd') 205 command = ('/usr/bin/powerd_dbus_suspend --delay=%d --timeout=30 ' 206 '--wakeup_count=%d --wakeup_timeout=%d' % 207 (delay_seconds, wakeup_count, wakeup_timeout)) 208 logging.info("Running '%s'", command) 209 os.system(command) 210 check_wakeup(estimated_alarm) 211 return estimated_alarm 212 213 214def suspend_for(time_in_suspend, delay_seconds=0): 215 """Suspend using the power manager and spend |time_in_suspend| in suspend. 216 217 Wait for |delay_seconds|, suspend the system(S3/S0iX) using 218 powerd_dbus_suspend program. power manager sets a wakeup alarm on the dut 219 for |time_in_suspend| just before suspending 220 (writing power state to /sys/power/state). Thus the device spends most of 221 |time_in_suspend| in S0iX/S3. 222 223 Function will block until suspend/resume has completed or failed. 224 Returns the wake alarm time from the RTC as epoch. 225 226 @param time_in_suspend: Number of seconds to suspend the DUT. 227 @param delay_seconds: Number of seconds wait before suspending the DUT. 228 """ 229 pause_check_network_hook() 230 231 wakeup_count = read_wakeup_count() 232 upstart.ensure_running('powerd') 233 command = ('/usr/bin/powerd_dbus_suspend --delay=%d --timeout=30 ' 234 '--wakeup_count=%d --suspend_for_sec=%d' % 235 (delay_seconds, wakeup_count, time_in_suspend)) 236 logging.info("Running '%s'", command) 237 os.system(command) 238 239 240def suspend_bg_for_dark_resume(suspend_seconds, delay_seconds=0): 241 """Do a non-blocking suspend using power manager. 242 243 Wait for |delay_seconds|, then suspend with an rtc alarm for 244 waking up again after having been suspended for |suspend_seconds|, using 245 the powerd_dbus_suspend program all in the background. 246 247 @param suspend_seconds: Number of seconds to suspend the DUT. 248 @param delay_seconds: Number of seconds wait before suspending the DUT. 249 250 """ 251 upstart.ensure_running('powerd') 252 # Disarm any existing wake alarms so as to prevent early wakeups. 253 os.system('echo 0 > /sys/class/rtc/rtc0/wakealarm') 254 wakeup_count = read_wakeup_count() 255 command = ('/usr/bin/powerd_dbus_suspend --delay=%d --timeout=30 ' 256 '--wakeup_count=%d --wakeup_timeout=%d ' 257 '--disable_dark_resume=false' % 258 (delay_seconds, wakeup_count, suspend_seconds)) 259 logging.info("Running '%s'", command) 260 process = multiprocessing.Process(target=os.system, args=(command,)) 261 process.start() 262 263 264def kernel_suspend(seconds, state='mem'): 265 """Do a kernel suspend. 266 267 Suspend the system to @state, waking up again after |seconds|, by directly 268 writing to /sys/power/state. Function will block until suspend/resume has 269 completed or failed. 270 271 @param seconds: The number of seconds to suspend the device. 272 @param state: power state to suspend to. DEFAULT mem. 273 """ 274 alarm, wakeup_count = prepare_wakeup(seconds) 275 rtc.set_wake_alarm(alarm) 276 logging.debug('Saving wakeup count: %d', wakeup_count) 277 write_wakeup_count(wakeup_count) 278 try: 279 logging.info('Suspending at %d', rtc.get_seconds()) 280 with open(SYSFS_POWER_STATE, 'w') as sysfs_file: 281 sysfs_file.write(state) 282 except IOError as e: 283 logging.exception('Writing to %s failed', SYSFS_POWER_STATE) 284 if e.errno == errno.EBUSY and rtc.get_seconds() >= alarm: 285 # The kernel returns EBUSY if it has to abort because 286 # another wakeup fires before we've reached suspend. 287 raise SpuriousWakeupError('Received spurious wakeup in kernel.') 288 else: 289 # Some driver probably failed to suspend properly. 290 # A hint as to what failed is in errno. 291 raise KernelError('Suspend failed: %s' % e.strerror) 292 else: 293 logging.info('Woke from suspend at %d', rtc.get_seconds()) 294 logging.debug('New wakeup count: %d', read_wakeup_count()) 295 check_wakeup(alarm) 296 297 298def idle_suspend(seconds): 299 """ 300 Wait for the system to suspend to RAM (S3), scheduling the RTC to wake up 301 |seconds| after this function was called. Caller must ensure that the system 302 will idle-suspend in time for this to happen. Returns the wake alarm time 303 from the RTC as epoch. 304 305 @param seconds: The number of seconds before wakeup. 306 """ 307 alarm, _ = prepare_wakeup(seconds) 308 rtc.set_wake_alarm(alarm) 309 while rtc.get_seconds() < alarm: 310 time.sleep(0.2) 311 312 # tell powerd something happened, or it will immediately idle-suspend again 313 # TODO: switch to cros.power_utils#call_powerd_dbus_method once this 314 # horrible mess with the WiFi tests and this file's imports is solved 315 logging.debug('Simulating user activity after idle suspend...') 316 os.system('dbus-send --type=method_call --system ' 317 '--dest=org.chromium.PowerManager /org/chromium/PowerManager ' 318 'org.chromium.PowerManager.HandleUserActivity') 319 320 return alarm 321 322 323def memory_suspend(seconds, size=0): 324 """Do a memory suspend. 325 326 Suspend the system to RAM (S3), waking up again after |seconds|, using 327 the memory_suspend_test tool. Function will block until suspend/resume has 328 completed or failed. 329 330 @param seconds: The number of seconds to suspend the device. 331 @param size: Amount of memory to allocate, in bytes. 332 Set to 0 to let memory_suspend_test determine amount of memory. 333 """ 334 # since we cannot have utils.system_output in here, we need a workaround 335 output = '/tmp/memory_suspend_output' 336 wakeup_count = read_wakeup_count() 337 status = os.system('/usr/bin/memory_suspend_test --wakeup_count=%d ' 338 '--size=%d > %s --suspend_for_sec=%d' % 339 (wakeup_count, size, output, seconds)) 340 status = os.WEXITSTATUS(status) 341 if status == 2: 342 logging.error('memory_suspend_test found the following errors:') 343 for line in open(output, 'r'): 344 logging.error(line) 345 raise MemoryError('Memory corruption found after resume') 346 elif status == 1: 347 raise SuspendFailure('Failure in powerd_suspend during memory test') 348 elif status: 349 raise SuspendFailure('Unknown failure in memory_suspend_test (crash?)') 350 351 352def read_wakeup_count(): 353 """ 354 Retrieves the current value of /sys/power/wakeup_count. 355 """ 356 with open(SYSFS_WAKEUP_COUNT) as sysfs_file: 357 return int(sysfs_file.read().rstrip('\n')) 358 359 360def write_wakeup_count(wakeup_count): 361 """Writes a value to /sys/power/wakeup_count. 362 363 @param wakeup_count: The wakeup count value to write. 364 """ 365 try: 366 with open(SYSFS_WAKEUP_COUNT, 'w') as sysfs_file: 367 sysfs_file.write(str(wakeup_count)) 368 except IOError as e: 369 if (e.errno == errno.EINVAL and read_wakeup_count() != wakeup_count): 370 raise SpuriousWakeupError('wakeup_count changed before suspend') 371 else: 372 raise 373