1# Copyright 2017 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging 6import pprint 7import time 8import re 9 10from autotest_lib.client.common_lib import error 11from autotest_lib.server import autotest 12from autotest_lib.server.cros.faft.cr50_test import Cr50Test 13 14 15class firmware_Cr50DeviceState(Cr50Test): 16 """Verify Cr50 tracks the EC and AP state correctly. 17 18 Put the device through S0, S0ix, S3, and G3. Cr50 responds to these state 19 changes by enabling/disabling uart and changing its suspend type. Verify 20 that none of these cause any interrupt storms on Cr50. Make sure that there 21 aren't any interrupt storms and that Cr50 enters regular or deep sleep a 22 reasonable amount of times. 23 """ 24 version = 1 25 26 DEEP_SLEEP_STEP_SUFFIX = ' Num Deep Sleep Steps' 27 28 KEY_CMD_END_TIME = -4 29 # Use negative numbers to keep track of counts not in the IRQ list. 30 KEY_DEEP_SLEEP = -3 31 KEY_TIME = -2 32 KEY_RESET = -1 33 KEY_REGULAR_SLEEP = 112 34 INT_NAME = { 35 KEY_RESET : 'Reset Count', 36 KEY_DEEP_SLEEP : 'Deep Sleep Count', 37 KEY_TIME : 'Cr50 Time', 38 4 : 'HOST_CMD_DONE', 39 81 : 'GPIO0', 40 98 : 'GPIO1', 41 103 : 'I2CS WRITE', 42 KEY_REGULAR_SLEEP : 'PMU WAKEUP', 43 113 : 'AC present FED', 44 114 : 'AC present RED', 45 124 : 'RBOX_INTR_PWRB', 46 130 : 'SPS CS deassert', 47 138 : 'SPS RXFIFO LVL', 48 159 : 'SPS RXFIFO overflow', 49 160 : 'EVENT TIMER', 50 174 : 'CR50_RX_SERVO_TX', 51 177 : 'CR50_TX_SERVO_RX', 52 181 : 'AP_TX_CR50_RX', 53 184 : 'AP_RX_CR50_TX', 54 188 : 'EC_TX_CR50_RX', 55 191 : 'EC_RX_CR50_TX', 56 193 : 'USB', 57 } 58 IGNORED_KEYS = [KEY_CMD_END_TIME] 59 SLEEP_KEYS = [ KEY_REGULAR_SLEEP, KEY_DEEP_SLEEP ] 60 # USB, AP UART, and EC UART should be disabled if ccd is disabled. 61 CCD_KEYS = [ 181, 184, 188, 191, 193 ] 62 63 # Cr50 won't enable any form of sleep until it has been up for 20 seconds. 64 SLEEP_DELAY = 20 65 # The time in seconds to wait in each state. Wait one minute so it's long 66 # enough for cr50 to settle into whatever state. 60 seconds is also long 67 # enough that cr50 has enough time to enter deep sleep twice, so we can 68 # catch extra wakeups. 69 SLEEP_TIME = 60 70 SHORT_WAIT = 5 71 CONSERVATIVE_WAIT_TIME = SLEEP_TIME * 2 72 # Cr50 should wake up twice per second while in regular sleep 73 SLEEP_RATE = 2 74 75 DEEP_SLEEP_MAX = 2 76 ARM = 'ARM ' 77 # If there are over 100,000 interrupts, it is an interrupt storm. 78 DEFAULT_COUNTS = [0, 100000] 79 # A dictionary of ok count values for each irq that shouldn't follow the 80 # DEFAULT_COUNTS range. 81 EXPECTED_IRQ_COUNT_RANGE = { 82 KEY_RESET : [0, 0], 83 KEY_DEEP_SLEEP : [0, DEEP_SLEEP_MAX], 84 KEY_TIME : [0, CONSERVATIVE_WAIT_TIME], 85 'S0ix' + DEEP_SLEEP_STEP_SUFFIX : [0, 0], 86 # Cr50 may enter deep sleep an extra time, because of how the test 87 # collects taskinfo counts. Just verify that it does enter deep sleep 88 'S3' + DEEP_SLEEP_STEP_SUFFIX : [1, 2], 89 'G3' + DEEP_SLEEP_STEP_SUFFIX : [1, 2], 90 # ARM devices don't enter deep sleep in S3 91 ARM + 'S3' + DEEP_SLEEP_STEP_SUFFIX : [0, 0], 92 ARM + 'G3' + DEEP_SLEEP_STEP_SUFFIX : [1, 2], 93 # Regular sleep is calculated based on the cr50 time 94 } 95 96 # Each line relevant taskinfo output should be 13 characters long with only 97 # digits or spaces. Use this information to make sure every taskinfo command 98 # gets the full relevant output. There are 4 characters for the irq number 99 # and 9 for the count. 100 GET_TASKINFO = ['IRQ counts by type:\s+(([\d ]{13}\r\n)+)Service calls'] 101 102 START = '' 103 INCREASE = '+' 104 DS_RESUME = 'DS' 105 106 TMP_POWER_MANAGER_PATH = '/tmp/power_manager' 107 POWER_MANAGER_PATH = '/var/lib/power_manager' 108 # TODO(mruthven): remove ec chan restriction once soraka stops spamming host 109 # command output. The extra activity makes it look like a interrupt storm on 110 # the EC uart. 111 CHAN_ALL = 0xffffffff 112 CHAN_EVENTS = 0x20 113 CHAN_ACPI = 0x400 114 CHAN_HOSTCMD = 0x80 115 CHAN_USBCHARGE = 0x200000 116 CHAN_RESTRICTED = CHAN_ALL ^ (CHAN_EVENTS | CHAN_ACPI | CHAN_HOSTCMD 117 | CHAN_USBCHARGE) 118 119 120 def initialize(self, host, cmdline_args, full_args): 121 super(firmware_Cr50DeviceState, self).initialize(host, cmdline_args, 122 full_args) 123 # Don't bother if there is no Chrome EC or if EC hibernate doesn't work. 124 if not self.check_ec_capability(): 125 raise error.TestNAError("Nothing needs to be tested on this device") 126 127 # If the TPM is reset in S0i3, the CR50 may enter deep sleep during S0i3. 128 # Cr50 may enter deep sleep an extra time, because of how the test 129 # collects taskinfo counts. So the range is set conservatively to 0-2. 130 if self.check_cr50_capability(['deep_sleep_in_s0i3']): 131 irq_s0ix_deep_sleep_key = 'S0ix' + self.DEEP_SLEEP_STEP_SUFFIX 132 self.EXPECTED_IRQ_COUNT_RANGE[irq_s0ix_deep_sleep_key] = [0, 2] 133 134 def mount_power_config(self): 135 """Mounts power_manager settings to tmp, 136 ensuring that any changes do not persist across reboots 137 """ 138 self.faft_client.system.run_shell_command( 139 'mkdir -p %s && \ 140 echo 0 > %s/suspend_to_idle && \ 141 mount --bind %s %s && \ 142 restart powerd' % 143 (self.TMP_POWER_MANAGER_PATH, self.TMP_POWER_MANAGER_PATH, 144 self.TMP_POWER_MANAGER_PATH, self.POWER_MANAGER_PATH), True) 145 146 def umount_power_config(self): 147 """Unmounts power_manager settings""" 148 self.faft_client.system.run_shell_command( 149 'umount %s && restart powerd' % self.POWER_MANAGER_PATH, True) 150 151 def set_suspend_to_idle(self, value): 152 """Set suspend_to_idle by writing to power_manager settings""" 153 # Suspend to idle expects 0/1 so %d is used 154 self.faft_client.system.run_shell_command( 155 'echo %d > %s/suspend_to_idle' % 156 (value, self.TMP_POWER_MANAGER_PATH), True) 157 158 159 def log_sleep_debug_information(self): 160 """Log some information used for debugging sleep issues""" 161 logging.debug( 162 self.cr50.send_command_retry_get_output('sleepmask', 163 ['sleepmask.*>'], 164 safe=True)[0]) 165 logging.debug( 166 self.cr50.send_command_retry_get_output('sysinfo', 167 ['sysinfo.*>'], 168 safe=True)[0]) 169 170 171 def get_taskinfo_output(self): 172 """Return a dict with the irq numbers as keys and counts as values""" 173 output = self.cr50.send_command_retry_get_output('taskinfo', 174 self.GET_TASKINFO, safe=True, retries=10)[0][1].strip() 175 logging.debug(output) 176 return output 177 178 179 def get_irq_counts(self): 180 """Return a dict with the irq numbers as keys and counts as values""" 181 irq_counts = { self.KEY_REGULAR_SLEEP : 0 } 182 # Running all of these commands may take a while. Track how much time 183 # commands are running, so we can offset the cr50 time to set sleep 184 # expectations 185 start_cmd_time = int(self.cr50.gettime()) 186 irq_counts[self.KEY_TIME] = start_cmd_time 187 188 output = self.get_taskinfo_output() 189 irq_list = re.findall('\d+\s+\d+\r', output) 190 # Make sure the regular sleep irq is in the dictionary, even if cr50 191 # hasn't seen any interrupts. It's important the test sees that there's 192 # never an interrupt. 193 for irq_info in irq_list: 194 logging.debug(irq_info) 195 num, count = irq_info.split() 196 irq_counts[int(num)] = int(count) 197 irq_counts[self.KEY_RESET] = int(self.cr50.get_reset_count()) 198 irq_counts[self.KEY_DEEP_SLEEP] = int(self.cr50.get_deep_sleep_count()) 199 # Log some information, so we can debug issues with sleep. 200 self.log_sleep_debug_information() 201 # Track when the commands end, so the test can ignore the time spent 202 # running these console commands. 203 end_cmd_time = int(self.cr50.gettime()) 204 irq_counts[self.KEY_CMD_END_TIME] = end_cmd_time 205 logging.info('Commands finished in %d seconds', 206 end_cmd_time - start_cmd_time) 207 return irq_counts 208 209 210 def get_expected_count(self, irq_key, cr50_time, idle): 211 """Get the expected irq increase for the given irq and state 212 213 Args: 214 irq_key: the irq int 215 cr50_time: the cr50 time in seconds 216 idle: Cr50 should enter regular sleep while the device is idle. 217 218 Returns: 219 A list with the expected irq count range [min, max] 220 """ 221 # CCD will prevent sleep 222 if self.ccd_enabled and (irq_key in self.SLEEP_KEYS or 223 self.DEEP_SLEEP_STEP_SUFFIX in str(irq_key)): 224 return [0, 0] 225 if irq_key == self.KEY_REGULAR_SLEEP: 226 # If cr50_time is really low, we probably woke cr50 up using 227 # taskinfo, which would be a pmu wakeup. 228 if cr50_time < 5: 229 return [0, 1] 230 # Only enforce the minimum regular sleep count if the device is 231 # idle. Cr50 may not enter regular sleep during power state 232 # transitions. 233 if idle: 234 min_count = max(cr50_time - self.SLEEP_DELAY, 0) 235 else: 236 min_count = 0 237 # Check that cr50 isn't continuously entering and exiting sleep. 238 # The PMU wakeups should happen around twice a second. Depending 239 # on TPM activity it may occur more often. Add 2 to the multiplier 240 # to allow for extra wakeups. This is mostly to catch issues that 241 # cause cr50 to wake up 100 times a second 242 max_count = cr50_time * (self.SLEEP_RATE + 2) 243 return [min_count, max_count] 244 # If ccd is disabled, ccd irq counts should not increase. 245 if not self.ccd_enabled and (irq_key in self.CCD_KEYS): 246 return [0, 0] 247 return self.EXPECTED_IRQ_COUNT_RANGE.get(irq_key, self.DEFAULT_COUNTS) 248 249 250 def check_increase(self, irq_key, name, increase, expected_range): 251 """Verify the irq count is within the expected range 252 253 Args: 254 irq_key: the irq int 255 name: the irq name string 256 increase: the irq count 257 expected_range: A list with the valid irq count range [min, max] 258 259 Returns: 260 '' if increase is in the given range. If the increase isn't in the 261 range, it returns an error message. 262 """ 263 min_count, max_count = expected_range 264 if min_count > increase or max_count < increase: 265 err_msg = '%s %s: %s not in range %s' % (name, irq_key, increase, 266 expected_range) 267 return err_msg 268 return '' 269 270 271 def get_step_events(self): 272 """Use the deep sleep counts to determine the step events""" 273 ds_counts = self.get_irq_step_counts(self.KEY_DEEP_SLEEP) 274 events = [] 275 for i, count in enumerate(ds_counts): 276 if not i: 277 events.append(self.START) 278 elif count != ds_counts[i - 1]: 279 # If the deep sleep count changed, Cr50 recovered deep sleep 280 # and the irq counts are reset. 281 events.append(self.DS_RESUME) 282 else: 283 events.append(self.INCREASE) 284 return events 285 286 287 def get_irq_step_counts(self, irq_key): 288 """Get a list of the all of the step counts for the given irq""" 289 return [ irq_dict.get(irq_key, 0) for irq_dict in self.steps ] 290 291 292 def check_for_errors(self, state): 293 """Check for unexpected IRQ counts at each step. 294 295 Find the irq count errors and add them to run_errors. 296 297 Args: 298 state: The power state: S0, S0ix, S3, or G3. 299 """ 300 num_steps = len(self.steps) 301 # Get all of the deep sleep counts 302 events = self.get_step_events() 303 304 irq_list = list(self.irqs) 305 irq_list.sort() 306 307 # Pad the start of the step names, so the names align with each step 308 # count. 309 irq_diff = ['%24s|%s|' % ('', '|'.join(self.step_names))] 310 step_errors = [ [] for i in range(num_steps) ] 311 312 end_cmd_times = self.get_irq_step_counts(self.KEY_CMD_END_TIME) 313 start_cmd_times = self.get_irq_step_counts(self.KEY_TIME) 314 cr50_time_diff = [] 315 for i, start_time in enumerate(start_cmd_times): 316 # The test collects a lot of information using console commands. 317 # These take a long time to run and cr50 isn't idle. The idle 318 # time is the time this step started minus the time the last set of 319 # commands finished running. 320 if events[i] == self.INCREASE: 321 cr50_time_diff.append(start_time - end_cmd_times[i - 1]) 322 else: 323 cr50_time_diff.append(start_time) 324 325 # Go through each irq and update its info in the progress dict 326 for irq_key in irq_list: 327 if irq_key in self.IGNORED_KEYS: 328 continue 329 name = self.INT_NAME.get(irq_key, 'Unknown') 330 # Print the IRQ name on the left of the column and the irq number 331 # on the right. 332 irq_progress_str = ['%-19s %3s' % (name, irq_key)] 333 334 irq_counts = self.get_irq_step_counts(irq_key) 335 for step, count in enumerate(irq_counts): 336 event = events[step] 337 338 # The deep sleep counts are not reset after deep sleep. Change 339 # the event to INCREASE. 340 if irq_key == self.KEY_DEEP_SLEEP and event == self.DS_RESUME: 341 event = self.INCREASE 342 343 if event == self.INCREASE: 344 count -= irq_counts[step - 1] 345 346 # Check that the count increase is within the expected value. 347 if event != self.START: 348 step_name = self.step_names[step].strip() 349 # TODO(b/153891388): raise actual error once the servo 350 # character loss issue is fixed. 351 if count < 0: 352 raise error.TestNAError('%s test found negative %s ' 353 'count %r (likely due to servo ' 354 'dropping characters)' % 355 (step, step_name, count)) 356 expected_range = self.get_expected_count(irq_key, 357 cr50_time_diff[step], idle='idle' in step_name) 358 359 rv = self.check_increase(irq_key, name, count, 360 expected_range) 361 if rv: 362 logging.info('Unexpected count in %s test: %s %s', 363 state, step_name, rv) 364 step_errors[step].append(rv) 365 366 irq_progress_str.append(' %2s %8d' % (event, count)) 367 368 # Separate each step count with '|'. Add '|' to the start and end of 369 # the line. 370 irq_diff.append('|%s|' % '|'.join(irq_progress_str)) 371 372 errors = {} 373 374 ds_key = self.ARM if self.is_arm else '' 375 ds_key += state + self.DEEP_SLEEP_STEP_SUFFIX 376 expected_range = self.get_expected_count(ds_key, 0, False) 377 rv = self.check_increase(None, ds_key, events.count(self.DS_RESUME), 378 expected_range) 379 if rv: 380 logging.info('Unexpected count for %s %s', state, rv) 381 errors[ds_key] = rv 382 for i, step_error in enumerate(step_errors): 383 if step_error: 384 logging.error('Step %d errors:\n%s', i, 385 pprint.pformat(step_error)) 386 step = '%s step %d %s' % (state, i, self.step_names[i].strip()) 387 errors[step] = step_error 388 389 logging.info('DIFF %s IRQ Counts:\n%s', state, '\n'.join(irq_diff)) 390 if errors: 391 logging.info('ERRORS %s IRQ Counts:\n%s', state, 392 pprint.pformat(errors)) 393 self.run_errors.update(errors) 394 395 396 def ap_is_on_after_power_button_press(self): 397 """Returns True if the AP is on after pressing the power button""" 398 self.servo.power_normal_press() 399 # Give the AP some time to turn on 400 time.sleep(self.cr50.SHORT_WAIT) 401 return self.cr50.ap_is_on() 402 403 404 def trigger_s0(self): 405 """Press the power button so the DUT will wake up.""" 406 if self.ap_is_on_after_power_button_press(): 407 return 408 # Try a second time to see if the AP comes up. 409 if not self.ap_is_on_after_power_button_press(): 410 raise error.TestError('Could not wake the AP using power button') 411 logging.warning('Had to press power button twice to wake the AP') 412 413 414 def enter_state(self, state): 415 """Get the command to enter the power state""" 416 block = True 417 if state == 'S0': 418 self.trigger_s0() 419 # Suppress host command output, so it doesn't look like an interrupt 420 # storm. Set it whenever the system enters S0 to ensure the setting 421 # is restored if the EC enters hibernate. 422 time.sleep(2) 423 logging.info('Setting EC chan %x', self.CHAN_RESTRICTED) 424 self.ec.send_command('chan 0x%x' % self.CHAN_RESTRICTED) 425 else: 426 if state == 'S0ix': 427 self.set_suspend_to_idle(True) 428 self.suspend() 429 elif state == 'S3': 430 self.set_suspend_to_idle(False) 431 self.suspend() 432 elif state == 'G3': 433 self.faft_client.system.run_shell_command('poweroff', True) 434 435 time.sleep(self.SHORT_WAIT) 436 # check state transition 437 if not self.wait_power_state(state, self.SHORT_WAIT): 438 raise error.TestFail('Platform failed to reach %s state.' % state) 439 440 441 def stage_irq_add(self, irq_dict, name=''): 442 """Add the current irq counts to the stored dictionary of irq info""" 443 self.steps.append(irq_dict) 444 self.step_names.append(name.center(12)) 445 self.irqs.update(irq_dict.keys()) 446 logging.info('%s:\n%s', name, pprint.pformat(irq_dict)) 447 448 449 def reset_irq_counts(self): 450 """Reset the test IRQ counts""" 451 self.steps = [] 452 self.step_names = [] 453 self.irqs = set() 454 455 456 def run_transition(self, state): 457 """Enter the given power state and reenter s0 458 459 Enter the power state and return to S0. Wait long enough to ensure cr50 460 will enter sleep mode, so we can verify that as well. 461 462 Args: 463 state: the power state: S0ix, S3, or G3 464 """ 465 self.reset_irq_counts() 466 467 # Enter the given state 468 self.enter_state(state) 469 self.stage_irq_add(self.get_irq_counts(), 'entered %s' % state) 470 471 logging.info('waiting %d seconds', self.SLEEP_TIME) 472 time.sleep(self.SLEEP_TIME) 473 # Nothing is really happening. Cr50 should basically be idle during 474 # SLEEP_TIME. 475 self.stage_irq_add(self.get_irq_counts(), 'idle in %s' % state) 476 477 # Return to S0 478 self.enter_state('S0') 479 self.stage_irq_add(self.get_irq_counts(), 'entered S0') 480 481 logging.info('waiting %d seconds', self.SLEEP_TIME) 482 time.sleep(self.SLEEP_TIME) 483 484 self.stage_irq_add(self.get_irq_counts(), 'idle in S0') 485 486 487 def verify_state(self, state): 488 """Verify cr50 behavior while running through the power state""" 489 490 try: 491 self.run_transition(state) 492 finally: 493 # reset the system to S0 no matter what happens 494 self.trigger_s0() 495 # Reenable EC chan output. 496 time.sleep(2) 497 logging.info('Setting EC chan %x', self.CHAN_ALL) 498 self.ec.send_command('chan 0x%x' % self.CHAN_ALL) 499 500 # Check that the progress of the irq counts seems reasonable 501 self.check_for_errors(state) 502 503 504 def is_arm_family(self): 505 """Returns True if the DUT is an ARM device.""" 506 arch = self.host.run('arch').stdout.strip() 507 return arch in ['aarch64', 'armv7l'] 508 509 510 def run_through_power_states(self): 511 """Go through S0ix, S3, and G3. Verify there are no interrupt storms""" 512 self._try_to_bring_dut_up() 513 self.run_errors = {} 514 self.ccd_str = 'ccd ' + ('enabled' if self.ccd_enabled else 'disabled') 515 logging.info('Running through states with %s', self.ccd_str) 516 517 self.cr50.get_ccdstate() 518 if not self.cr50.get_sleepmask() and self.ccd_enabled: 519 logging.info('Sleepmask is not keeping cr50 up with ccd enabled') 520 self.all_errors[self.ccd_str] = 'usb is not active with ccd enabled' 521 return 522 523 # Login before entering S0ix so cr50 will be able to enter regular sleep 524 client_at = autotest.Autotest(self.host) 525 client_at.run_test('login_LoginSuccess') 526 527 self.mount_power_config() 528 try: 529 if self.s0ix_supported: 530 self.verify_state('S0ix') 531 532 if self.s3_supported: 533 self.verify_state('S3') 534 finally: 535 self.umount_power_config() 536 537 # Enter G3 538 self.verify_state('G3') 539 if self.run_errors: 540 self.all_errors[self.ccd_str] = self.run_errors 541 542 543 def run_once(self, host): 544 """Go through S0ix, S3, and G3. Verify there are no interrupt storms""" 545 self.all_errors = {} 546 self.host = host 547 self.is_arm = self.is_arm_family() 548 supports_dts_control = self.cr50.servo_dts_mode_is_valid() 549 550 if supports_dts_control: 551 self.cr50.ccd_disable(raise_error=True) 552 553 self.ccd_enabled = self.cr50.ccd_is_enabled() 554 # Check if the device supports S0ix. 555 self.s0ix_supported = not self.host.run( 556 'check_powerd_config --suspend_to_idle', 557 ignore_status=True).exit_status 558 # Check if the device supports S3. 559 self.s3_supported = not self.host.run( 560 'grep -q deep /sys/power/mem_sleep', 561 ignore_status=True).exit_status 562 563 self.run_through_power_states() 564 565 if supports_dts_control: 566 ccd_was_enabled = self.ccd_enabled 567 self.cr50.ccd_enable(raise_error=supports_dts_control) 568 self.ccd_enabled = self.cr50.ccd_is_enabled() 569 # If the first run had ccd disabled, and the test was able to enable 570 # ccd, run through the states again to make sure there are no issues 571 # come up when ccd is enabled. 572 if not ccd_was_enabled and self.ccd_enabled: 573 self.run_through_power_states() 574 else: 575 logging.info('Current setup only supports test with ccd %sabled.', 576 'en' if self.ccd_enabled else 'dis') 577 578 self.trigger_s0() 579 if self.all_errors: 580 raise error.TestFail('Unexpected Device State: %s' % 581 self.all_errors) 582 if not supports_dts_control: 583 raise error.TestNAError('Verified device state with %s. Please ' 584 'run with type c servo v4 to test full device state.' % 585 self.ccd_str) 586