1# Copyright 2017 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging
6import pprint
7import time
8import re
9
10from autotest_lib.client.common_lib import error
11from autotest_lib.server import autotest
12from autotest_lib.server.cros.faft.cr50_test import Cr50Test
13
14
15class firmware_Cr50DeviceState(Cr50Test):
16    """Verify Cr50 tracks the EC and AP state correctly.
17
18    Put the device through S0, S0ix, S3, and G3. Cr50 responds to these state
19    changes by enabling/disabling uart and changing its suspend type. Verify
20    that none of these cause any interrupt storms on Cr50. Make sure that there
21    aren't any interrupt storms and that Cr50 enters regular or deep sleep a
22    reasonable amount of times.
23    """
24    version = 1
25
26    DEEP_SLEEP_STEP_SUFFIX = ' Num Deep Sleep Steps'
27
28    KEY_CMD_END_TIME = -4
29    # Use negative numbers to keep track of counts not in the IRQ list.
30    KEY_DEEP_SLEEP = -3
31    KEY_TIME = -2
32    KEY_RESET = -1
33    KEY_REGULAR_SLEEP = 112
34    INT_NAME = {
35        KEY_RESET  : 'Reset Count',
36        KEY_DEEP_SLEEP  : 'Deep Sleep Count',
37        KEY_TIME  : 'Cr50 Time',
38        4 : 'HOST_CMD_DONE',
39        81  : 'GPIO0',
40        98  : 'GPIO1',
41        103 : 'I2CS WRITE',
42        KEY_REGULAR_SLEEP : 'PMU WAKEUP',
43        113 : 'AC present FED',
44        114 : 'AC present RED',
45        124 : 'RBOX_INTR_PWRB',
46        130 : 'SPS CS deassert',
47        138 : 'SPS RXFIFO LVL',
48        159 : 'SPS RXFIFO overflow',
49        160 : 'EVENT TIMER',
50        174 : 'CR50_RX_SERVO_TX',
51        177 : 'CR50_TX_SERVO_RX',
52        181 : 'AP_TX_CR50_RX',
53        184 : 'AP_RX_CR50_TX',
54        188 : 'EC_TX_CR50_RX',
55        191 : 'EC_RX_CR50_TX',
56        193 : 'USB',
57    }
58    IGNORED_KEYS = [KEY_CMD_END_TIME]
59    SLEEP_KEYS = [ KEY_REGULAR_SLEEP, KEY_DEEP_SLEEP ]
60    # USB, AP UART, and EC UART should be disabled if ccd is disabled.
61    CCD_KEYS = [ 181, 184, 188, 191, 193 ]
62
63    # Cr50 won't enable any form of sleep until it has been up for 20 seconds.
64    SLEEP_DELAY = 20
65    # The time in seconds to wait in each state. Wait one minute so it's long
66    # enough for cr50 to settle into whatever state. 60 seconds is also long
67    # enough that cr50 has enough time to enter deep sleep twice, so we can
68    # catch extra wakeups.
69    SLEEP_TIME = 60
70    SHORT_WAIT = 5
71    CONSERVATIVE_WAIT_TIME = SLEEP_TIME * 2
72    # Cr50 should wake up twice per second while in regular sleep
73    SLEEP_RATE = 2
74
75    DEEP_SLEEP_MAX = 2
76    ARM = 'ARM '
77    # If there are over 100,000 interrupts, it is an interrupt storm.
78    DEFAULT_COUNTS = [0, 100000]
79    # A dictionary of ok count values for each irq that shouldn't follow the
80    # DEFAULT_COUNTS range.
81    EXPECTED_IRQ_COUNT_RANGE = {
82        KEY_RESET : [0, 0],
83        KEY_DEEP_SLEEP : [0, DEEP_SLEEP_MAX],
84        KEY_TIME : [0, CONSERVATIVE_WAIT_TIME],
85        'S0ix' + DEEP_SLEEP_STEP_SUFFIX : [0, 0],
86        # Cr50 may enter deep sleep an extra time, because of how the test
87        # collects taskinfo counts. Just verify that it does enter deep sleep
88        'S3' + DEEP_SLEEP_STEP_SUFFIX : [1, 2],
89        'G3' + DEEP_SLEEP_STEP_SUFFIX : [1, 2],
90        # ARM devices don't enter deep sleep in S3
91        ARM + 'S3' + DEEP_SLEEP_STEP_SUFFIX : [0, 0],
92        ARM + 'G3' + DEEP_SLEEP_STEP_SUFFIX : [1, 2],
93        # Regular sleep is calculated based on the cr50 time
94    }
95
96    # Each line relevant taskinfo output should be 13 characters long with only
97    # digits or spaces. Use this information to make sure every taskinfo command
98    # gets the full relevant output. There are 4 characters for the irq number
99    # and 9 for the count.
100    GET_TASKINFO = ['IRQ counts by type:\s+(([\d ]{13}\r\n)+)Service calls']
101
102    START = ''
103    INCREASE = '+'
104    DS_RESUME = 'DS'
105
106    TMP_POWER_MANAGER_PATH = '/tmp/power_manager'
107    POWER_MANAGER_PATH = '/var/lib/power_manager'
108    # TODO(mruthven): remove ec chan restriction once soraka stops spamming host
109    # command output. The extra activity makes it look like a interrupt storm on
110    # the EC uart.
111    CHAN_ALL = 0xffffffff
112    CHAN_EVENTS = 0x20
113    CHAN_ACPI = 0x400
114    CHAN_HOSTCMD = 0x80
115    CHAN_USBCHARGE = 0x200000
116    CHAN_RESTRICTED = CHAN_ALL ^ (CHAN_EVENTS | CHAN_ACPI | CHAN_HOSTCMD
117                                  | CHAN_USBCHARGE)
118
119
120    def initialize(self, host, cmdline_args, full_args):
121        super(firmware_Cr50DeviceState, self).initialize(host, cmdline_args,
122                                                         full_args)
123        # Don't bother if there is no Chrome EC or if EC hibernate doesn't work.
124        if not self.check_ec_capability():
125            raise error.TestNAError("Nothing needs to be tested on this device")
126
127        # If the TPM is reset in S0i3, the CR50 may enter deep sleep during S0i3.
128        # Cr50 may enter deep sleep an extra time, because of how the test
129        # collects taskinfo counts. So the range is set conservatively to 0-2.
130        if self.check_cr50_capability(['deep_sleep_in_s0i3']):
131            irq_s0ix_deep_sleep_key = 'S0ix' + self.DEEP_SLEEP_STEP_SUFFIX
132            self.EXPECTED_IRQ_COUNT_RANGE[irq_s0ix_deep_sleep_key] = [0, 2]
133
134    def mount_power_config(self):
135        """Mounts power_manager settings to tmp,
136        ensuring that any changes do not persist across reboots
137        """
138        self.faft_client.system.run_shell_command(
139                'mkdir -p %s && \
140            echo 0 > %s/suspend_to_idle && \
141            mount --bind %s %s && \
142            restart powerd' %
143                (self.TMP_POWER_MANAGER_PATH, self.TMP_POWER_MANAGER_PATH,
144                 self.TMP_POWER_MANAGER_PATH, self.POWER_MANAGER_PATH), True)
145
146    def umount_power_config(self):
147        """Unmounts power_manager settings"""
148        self.faft_client.system.run_shell_command(
149                'umount %s && restart powerd' % self.POWER_MANAGER_PATH, True)
150
151    def set_suspend_to_idle(self, value):
152        """Set suspend_to_idle by writing to power_manager settings"""
153        # Suspend to idle expects 0/1 so %d is used
154        self.faft_client.system.run_shell_command(
155                'echo %d > %s/suspend_to_idle' %
156                (value, self.TMP_POWER_MANAGER_PATH), True)
157
158
159    def log_sleep_debug_information(self):
160        """Log some information used for debugging sleep issues"""
161        logging.debug(
162            self.cr50.send_command_retry_get_output('sleepmask',
163                                                    ['sleepmask.*>'],
164                                                    safe=True)[0])
165        logging.debug(
166            self.cr50.send_command_retry_get_output('sysinfo',
167                                                    ['sysinfo.*>'],
168                                                    safe=True)[0])
169
170
171    def get_taskinfo_output(self):
172        """Return a dict with the irq numbers as keys and counts as values"""
173        output = self.cr50.send_command_retry_get_output('taskinfo',
174            self.GET_TASKINFO, safe=True, retries=10)[0][1].strip()
175        logging.debug(output)
176        return output
177
178
179    def get_irq_counts(self):
180        """Return a dict with the irq numbers as keys and counts as values"""
181        irq_counts = { self.KEY_REGULAR_SLEEP : 0 }
182        # Running all of these commands may take a while. Track how much time
183        # commands are running, so we can offset the cr50 time to set sleep
184        # expectations
185        start_cmd_time = int(self.cr50.gettime())
186        irq_counts[self.KEY_TIME] = start_cmd_time
187
188        output = self.get_taskinfo_output()
189        irq_list = re.findall('\d+\s+\d+\r', output)
190        # Make sure the regular sleep irq is in the dictionary, even if cr50
191        # hasn't seen any interrupts. It's important the test sees that there's
192        # never an interrupt.
193        for irq_info in irq_list:
194            logging.debug(irq_info)
195            num, count = irq_info.split()
196            irq_counts[int(num)] = int(count)
197        irq_counts[self.KEY_RESET] = int(self.cr50.get_reset_count())
198        irq_counts[self.KEY_DEEP_SLEEP] = int(self.cr50.get_deep_sleep_count())
199        # Log some information, so we can debug issues with sleep.
200        self.log_sleep_debug_information()
201        # Track when the commands end, so the test can ignore the time spent
202        # running these console commands.
203        end_cmd_time = int(self.cr50.gettime())
204        irq_counts[self.KEY_CMD_END_TIME] = end_cmd_time
205        logging.info('Commands finished in %d seconds',
206                     end_cmd_time - start_cmd_time)
207        return irq_counts
208
209
210    def get_expected_count(self, irq_key, cr50_time, idle):
211        """Get the expected irq increase for the given irq and state
212
213        Args:
214            irq_key: the irq int
215            cr50_time: the cr50 time in seconds
216            idle: Cr50 should enter regular sleep while the device is idle.
217
218        Returns:
219            A list with the expected irq count range [min, max]
220        """
221        # CCD will prevent sleep
222        if self.ccd_enabled and (irq_key in self.SLEEP_KEYS or
223            self.DEEP_SLEEP_STEP_SUFFIX in str(irq_key)):
224            return [0, 0]
225        if irq_key == self.KEY_REGULAR_SLEEP:
226            # If cr50_time is really low, we probably woke cr50 up using
227            # taskinfo, which would be a pmu wakeup.
228            if cr50_time < 5:
229                return [0, 1]
230            # Only enforce the minimum regular sleep count if the device is
231            # idle. Cr50 may not enter regular sleep during power state
232            # transitions.
233            if idle:
234                min_count = max(cr50_time - self.SLEEP_DELAY, 0)
235            else:
236                min_count = 0
237            # Check that cr50 isn't continuously entering and exiting sleep.
238            # The PMU wakeups should happen around twice a second. Depending
239            # on TPM activity it may occur more often. Add 2 to the multiplier
240            # to allow for extra wakeups. This is mostly to catch issues that
241            # cause cr50 to wake up 100 times a second
242            max_count = cr50_time * (self.SLEEP_RATE + 2)
243            return [min_count, max_count]
244        # If ccd is disabled, ccd irq counts should not increase.
245        if not self.ccd_enabled and (irq_key in self.CCD_KEYS):
246            return [0, 0]
247        return self.EXPECTED_IRQ_COUNT_RANGE.get(irq_key, self.DEFAULT_COUNTS)
248
249
250    def check_increase(self, irq_key, name, increase, expected_range):
251        """Verify the irq count is within the expected range
252
253        Args:
254            irq_key: the irq int
255            name: the irq name string
256            increase: the irq count
257            expected_range: A list with the valid irq count range [min, max]
258
259        Returns:
260            '' if increase is in the given range. If the increase isn't in the
261            range, it returns an error message.
262        """
263        min_count, max_count = expected_range
264        if min_count > increase or max_count < increase:
265            err_msg = '%s %s: %s not in range %s' % (name, irq_key, increase,
266                expected_range)
267            return err_msg
268        return ''
269
270
271    def get_step_events(self):
272        """Use the deep sleep counts to determine the step events"""
273        ds_counts = self.get_irq_step_counts(self.KEY_DEEP_SLEEP)
274        events = []
275        for i, count in enumerate(ds_counts):
276            if not i:
277                events.append(self.START)
278            elif count != ds_counts[i - 1]:
279                # If the deep sleep count changed, Cr50 recovered deep sleep
280                # and the irq counts are reset.
281                events.append(self.DS_RESUME)
282            else:
283                events.append(self.INCREASE)
284        return events
285
286
287    def get_irq_step_counts(self, irq_key):
288        """Get a list of the all of the step counts for the given irq"""
289        return [ irq_dict.get(irq_key, 0) for irq_dict in self.steps ]
290
291
292    def check_for_errors(self, state):
293        """Check for unexpected IRQ counts at each step.
294
295        Find the irq count errors and add them to run_errors.
296
297        Args:
298            state: The power state: S0, S0ix, S3, or G3.
299        """
300        num_steps = len(self.steps)
301        # Get all of the deep sleep counts
302        events = self.get_step_events()
303
304        irq_list = list(self.irqs)
305        irq_list.sort()
306
307        # Pad the start of the step names, so the names align with each step
308        # count.
309        irq_diff = ['%24s|%s|' % ('', '|'.join(self.step_names))]
310        step_errors = [ [] for i in range(num_steps) ]
311
312        end_cmd_times = self.get_irq_step_counts(self.KEY_CMD_END_TIME)
313        start_cmd_times = self.get_irq_step_counts(self.KEY_TIME)
314        cr50_time_diff = []
315        for i, start_time in enumerate(start_cmd_times):
316            # The test collects a lot of information using console commands.
317            # These take a long time to run and cr50 isn't idle. The idle
318            # time is the time this step started minus the time the last set of
319            # commands finished running.
320            if events[i] == self.INCREASE:
321                cr50_time_diff.append(start_time - end_cmd_times[i - 1])
322            else:
323                cr50_time_diff.append(start_time)
324
325        # Go through each irq and update its info in the progress dict
326        for irq_key in irq_list:
327            if irq_key in self.IGNORED_KEYS:
328                continue
329            name = self.INT_NAME.get(irq_key, 'Unknown')
330            # Print the IRQ name on the left of the column and the irq number
331            # on the right.
332            irq_progress_str = ['%-19s %3s' % (name, irq_key)]
333
334            irq_counts = self.get_irq_step_counts(irq_key)
335            for step, count in enumerate(irq_counts):
336                event = events[step]
337
338                # The deep sleep counts are not reset after deep sleep. Change
339                # the event to INCREASE.
340                if irq_key == self.KEY_DEEP_SLEEP and event == self.DS_RESUME:
341                    event = self.INCREASE
342
343                if event == self.INCREASE:
344                    count -= irq_counts[step - 1]
345
346                # Check that the count increase is within the expected value.
347                if event != self.START:
348                    step_name = self.step_names[step].strip()
349                    # TODO(b/153891388): raise actual error once the servo
350                    # character loss issue is fixed.
351                    if count < 0:
352                        raise error.TestNAError('%s test found negative %s '
353                                                'count %r (likely due to servo '
354                                                'dropping characters)' %
355                                                (step, step_name, count))
356                    expected_range = self.get_expected_count(irq_key,
357                            cr50_time_diff[step], idle='idle' in step_name)
358
359                    rv = self.check_increase(irq_key, name, count,
360                            expected_range)
361                    if rv:
362                        logging.info('Unexpected count in %s test: %s %s',
363                                     state, step_name, rv)
364                        step_errors[step].append(rv)
365
366                irq_progress_str.append(' %2s %8d' % (event, count))
367
368            # Separate each step count with '|'. Add '|' to the start and end of
369            # the line.
370            irq_diff.append('|%s|' % '|'.join(irq_progress_str))
371
372        errors = {}
373
374        ds_key = self.ARM if self.is_arm else ''
375        ds_key += state + self.DEEP_SLEEP_STEP_SUFFIX
376        expected_range = self.get_expected_count(ds_key, 0, False)
377        rv = self.check_increase(None, ds_key, events.count(self.DS_RESUME),
378                expected_range)
379        if rv:
380            logging.info('Unexpected count for %s %s', state, rv)
381            errors[ds_key] = rv
382        for i, step_error in enumerate(step_errors):
383            if step_error:
384                logging.error('Step %d errors:\n%s', i,
385                        pprint.pformat(step_error))
386                step = '%s step %d %s' % (state, i, self.step_names[i].strip())
387                errors[step] = step_error
388
389        logging.info('DIFF %s IRQ Counts:\n%s', state, '\n'.join(irq_diff))
390        if errors:
391            logging.info('ERRORS %s IRQ Counts:\n%s', state,
392                    pprint.pformat(errors))
393            self.run_errors.update(errors)
394
395
396    def ap_is_on_after_power_button_press(self):
397        """Returns True if the AP is on after pressing the power button"""
398        self.servo.power_normal_press()
399        # Give the AP some time to turn on
400        time.sleep(self.cr50.SHORT_WAIT)
401        return self.cr50.ap_is_on()
402
403
404    def trigger_s0(self):
405        """Press the power button so the DUT will wake up."""
406        if self.ap_is_on_after_power_button_press():
407            return
408        # Try a second time to see if the AP comes up.
409        if not self.ap_is_on_after_power_button_press():
410            raise error.TestError('Could not wake the AP using power button')
411        logging.warning('Had to press power button twice to wake the AP')
412
413
414    def enter_state(self, state):
415        """Get the command to enter the power state"""
416        block = True
417        if state == 'S0':
418            self.trigger_s0()
419            # Suppress host command output, so it doesn't look like an interrupt
420            # storm. Set it whenever the system enters S0 to ensure the setting
421            # is restored if the EC enters hibernate.
422            time.sleep(2)
423            logging.info('Setting EC chan %x', self.CHAN_RESTRICTED)
424            self.ec.send_command('chan 0x%x' % self.CHAN_RESTRICTED)
425        else:
426            if state == 'S0ix':
427                self.set_suspend_to_idle(True)
428                self.suspend()
429            elif state == 'S3':
430                self.set_suspend_to_idle(False)
431                self.suspend()
432            elif state == 'G3':
433                self.faft_client.system.run_shell_command('poweroff', True)
434
435        time.sleep(self.SHORT_WAIT)
436        # check state transition
437        if not self.wait_power_state(state, self.SHORT_WAIT):
438            raise error.TestFail('Platform failed to reach %s state.' % state)
439
440
441    def stage_irq_add(self, irq_dict, name=''):
442        """Add the current irq counts to the stored dictionary of irq info"""
443        self.steps.append(irq_dict)
444        self.step_names.append(name.center(12))
445        self.irqs.update(irq_dict.keys())
446        logging.info('%s:\n%s', name, pprint.pformat(irq_dict))
447
448
449    def reset_irq_counts(self):
450        """Reset the test IRQ counts"""
451        self.steps = []
452        self.step_names = []
453        self.irqs = set()
454
455
456    def run_transition(self, state):
457        """Enter the given power state and reenter s0
458
459        Enter the power state and return to S0. Wait long enough to ensure cr50
460        will enter sleep mode, so we can verify that as well.
461
462        Args:
463            state: the power state: S0ix, S3, or G3
464        """
465        self.reset_irq_counts()
466
467        # Enter the given state
468        self.enter_state(state)
469        self.stage_irq_add(self.get_irq_counts(), 'entered %s' % state)
470
471        logging.info('waiting %d seconds', self.SLEEP_TIME)
472        time.sleep(self.SLEEP_TIME)
473        # Nothing is really happening. Cr50 should basically be idle during
474        # SLEEP_TIME.
475        self.stage_irq_add(self.get_irq_counts(), 'idle in %s' % state)
476
477        # Return to S0
478        self.enter_state('S0')
479        self.stage_irq_add(self.get_irq_counts(), 'entered S0')
480
481        logging.info('waiting %d seconds', self.SLEEP_TIME)
482        time.sleep(self.SLEEP_TIME)
483
484        self.stage_irq_add(self.get_irq_counts(), 'idle in S0')
485
486
487    def verify_state(self, state):
488        """Verify cr50 behavior while running through the power state"""
489
490        try:
491            self.run_transition(state)
492        finally:
493            # reset the system to S0 no matter what happens
494            self.trigger_s0()
495            # Reenable EC chan output.
496            time.sleep(2)
497            logging.info('Setting EC chan %x', self.CHAN_ALL)
498            self.ec.send_command('chan 0x%x' % self.CHAN_ALL)
499
500        # Check that the progress of the irq counts seems reasonable
501        self.check_for_errors(state)
502
503
504    def is_arm_family(self):
505        """Returns True if the DUT is an ARM device."""
506        arch = self.host.run('arch').stdout.strip()
507        return arch in ['aarch64', 'armv7l']
508
509
510    def run_through_power_states(self):
511        """Go through S0ix, S3, and G3. Verify there are no interrupt storms"""
512        self._try_to_bring_dut_up()
513        self.run_errors = {}
514        self.ccd_str = 'ccd ' + ('enabled' if self.ccd_enabled else 'disabled')
515        logging.info('Running through states with %s', self.ccd_str)
516
517        self.cr50.get_ccdstate()
518        if not self.cr50.get_sleepmask() and self.ccd_enabled:
519            logging.info('Sleepmask is not keeping cr50 up with ccd enabled')
520            self.all_errors[self.ccd_str] = 'usb is not active with ccd enabled'
521            return
522
523        # Login before entering S0ix so cr50 will be able to enter regular sleep
524        client_at = autotest.Autotest(self.host)
525        client_at.run_test('login_LoginSuccess')
526
527        self.mount_power_config()
528        try:
529            if self.s0ix_supported:
530                self.verify_state('S0ix')
531
532            if self.s3_supported:
533                self.verify_state('S3')
534        finally:
535            self.umount_power_config()
536
537        # Enter G3
538        self.verify_state('G3')
539        if self.run_errors:
540            self.all_errors[self.ccd_str] = self.run_errors
541
542
543    def run_once(self, host):
544        """Go through S0ix, S3, and G3. Verify there are no interrupt storms"""
545        self.all_errors = {}
546        self.host = host
547        self.is_arm = self.is_arm_family()
548        supports_dts_control = self.cr50.servo_dts_mode_is_valid()
549
550        if supports_dts_control:
551            self.cr50.ccd_disable(raise_error=True)
552
553        self.ccd_enabled = self.cr50.ccd_is_enabled()
554        # Check if the device supports S0ix.
555        self.s0ix_supported = not self.host.run(
556                'check_powerd_config --suspend_to_idle',
557                ignore_status=True).exit_status
558        # Check if the device supports S3.
559        self.s3_supported = not self.host.run(
560                'grep -q deep /sys/power/mem_sleep',
561                ignore_status=True).exit_status
562
563        self.run_through_power_states()
564
565        if supports_dts_control:
566            ccd_was_enabled = self.ccd_enabled
567            self.cr50.ccd_enable(raise_error=supports_dts_control)
568            self.ccd_enabled = self.cr50.ccd_is_enabled()
569            # If the first run had ccd disabled, and the test was able to enable
570            # ccd, run through the states again to make sure there are no issues
571            # come up when ccd is enabled.
572            if not ccd_was_enabled and self.ccd_enabled:
573                self.run_through_power_states()
574        else:
575            logging.info('Current setup only supports test with ccd %sabled.',
576                    'en' if self.ccd_enabled else 'dis')
577
578        self.trigger_s0()
579        if self.all_errors:
580            raise error.TestFail('Unexpected Device State: %s' %
581                    self.all_errors)
582        if not supports_dts_control:
583            raise error.TestNAError('Verified device state with %s. Please '
584                    'run with type c servo v4 to test full device state.' %
585                    self.ccd_str)
586