xref: /aosp_15_r20/external/autotest/server/site_tests/servo_LogGrab/servo_LogGrab.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# Lint as: python2, python3
2# Copyright 2020 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Test to validate servo.py log grabbing works."""
7
8import logging
9import os
10import re
11import tempfile
12import shutil
13
14from autotest_lib.client.common_lib import error
15from autotest_lib.server import test
16
17class servo_LogGrab(test.test):
18    """Ensure all logs are grabbed and glued together."""
19    version = 1
20
21    # Regex used to extract log-record timestamp from log-line
22    # An example log record timestamp is:
23    #   '2020-02-05 12:39:28,394 - EC - EC3PO.Interpreter'[..]
24    # This regex does a loose check on the date format as it does not
25    # enforce real month numbers for example.
26    # Note that the millisecond component is not part of the match group.
27    # This is because it can happen that two log messages compete for the
28    # file-print and one that was recorded a few milliseconds prior still
29    # makes it to the file first.
30    TS_RE = r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d{3} .*$'
31
32    TS_EXTRACTOR = re.compile(TS_RE)
33
34    def setup_dir(self):
35        """Setup a test directory to grab servo logs into and out of."""
36        self.logtestdir = tempfile.mkdtemp()
37
38    def grab_and_get_directory(self):
39        """Thin wrapper around servo_host instance_logs function.
40
41        @returns: directory name inside |self.logtestdir| containing servod logs
42                  None if no directory was created (and logs grabbed)
43
44        @raises error.TestFail: if log grabbing creates more or less than one
45                                directory
46        """
47        ts = self.servo_host.get_instance_logs_ts()
48        self.servo_host.get_instance_logs(ts, self.logtestdir)
49        dirs_created = len(os.listdir(self.logtestdir))
50        if dirs_created > 1:
51            raise error.TestFail('Log grabbing created more than one directory')
52        if dirs_created == 0:
53            # None to indicate no logs were grabbed.
54            return None
55        return os.path.join(self.logtestdir, os.listdir(self.logtestdir)[0])
56
57    def check_logs_ordered(self, outdir):
58        """Helper to check that all logs in |outdir| are concat in order.
59
60        Log grabbing glues together all logs for one loglevel into one
61        file. These files ought to be in order.
62
63        @param outdir: directory with logs to inspect
64
65        @raises error.TestFail: if any log in |outdir| contains unsorted log
66                                lines
67        """
68        for logf in os.listdir(outdir):
69            if not logf.startswith(self.servo_host.JOINT_LOG_PREFIX):
70                # MCU files are to be skipped.
71                continue
72            logpath = os.path.join(outdir, logf)
73            timestamps = []
74            with open(logpath, 'r') as log:
75                for line in log:
76                    ts_match = self.TS_EXTRACTOR.match(line)
77                    if ts_match:
78                        # It's safe to skip lines without timestamps, as those
79                        # are just linebreaks in the log record.
80                        timestamps.append(ts_match.group(1))
81            if timestamps != sorted(timestamps):
82                raise error.TestFail('Logfile %s contained unsorted '
83                                     'timestamps.' % logf)
84
85    def check_get_control_in_logs(self, outdir, ctrls):
86        """Helper to check debug logs in |outdir|.
87
88        This method always checks to ensure that the init logline saying that
89        servod is listening on a port is printed.
90
91        Note: if a control has been issued n times, and the goal is to ensure
92        that it shows up n times in the logs, add it n times to |ctrls|.
93
94        @param outdir: directory with logs to inspect
95        @param ctrls: list of get controls that have been issued and should be
96                      checked for
97
98        @returns: False: - if any of the controls in |ctrls| are not found in
99                           the logs
100                         - if the log line saying that servod is listening on a
101                           port is missing
102                  True: otherwise
103        """
104        artifacts = [re.compile(r'- \(get\) %s' % ctrl) for ctrl in ctrls]
105        # We also need to ensure that the initial logs don't get lost.
106        init_artifact = re.compile(r'Listening on (\d{1,3}\.){3}\d{1,3} '
107                                   r'port \d{1,5}')
108        artifacts.append(init_artifact)
109        fname = os.path.join(outdir, 'log.DEBUG.txt')
110        with open(fname, 'r') as f:
111            for line in f:
112                # Only maintain the artifacts that have had no match yet.
113                new_artifacts = []
114                found = False
115                for artifact in artifacts:
116                    if not found and artifact.search(line):
117                        # Each artifact is for one log-line. If a line is
118                        # supposed to appear x times, there should be x
119                        # artifacts, therefore we need to continue to avoid
120                        # undercounting.
121                        found = True
122                        continue
123                    # No match, add back to list.
124                    new_artifacts.append(artifact)
125                if not new_artifacts:
126                    # All found, so the log file checks out.
127                    return True
128                artifacts = new_artifacts
129        logging.error('The following artifacts were not found in logs:')
130        for artifact in artifacts:
131            logging.error(artifact.pattern)
132        return False
133
134
135    def test_singular(self):
136        """Subtest to verify single-file grabbing works.
137
138        As for any new test, the servod instance should only have one file
139        upon initialization. This subtest ensures that the one file is grabbed
140        properly and found in the results directory.
141
142        @raises error.TestFail: if the log line saying that servod is listening
143                                on a port is missing
144        @raises error.TestFail: one of the loglevel summaries is missing
145        """
146        outdir = self.grab_and_get_directory()
147        required = ['log.%s.txt' % l for l in ['INFO', 'DEBUG', 'WARNING']]
148        logs = os.listdir(outdir)
149        for log in required:
150            if log not in logs:
151                raise error.TestFail('Required file %s missing. '
152                                     'Log files found: %s'
153                                     % (log, ', '.join(logs)))
154        self.check_logs_ordered(outdir)
155        self.check_get_control_in_logs(outdir, [])
156
157    def test_dual(self):
158        """Subtest to verify singular file rotation works.
159
160        Test rotates the logs once and then checks integrity.
161
162        @raises error.TestFail: if the log line saying that servod is listening
163                                on a port is missing
164        @raises error.TestFail: logline showing 'config_files' was issued is
165                                missing
166        """
167        logging.info('Running subtest to rotate logs once and validate that '
168                     'logs are glued together in the results directory '
169                     'correctly.')
170        # First, rotate out the logs.
171        self.servo.set_nocheck('rotate_servod_logs', 'yes')
172        # Next issue a control that (a) requires no underlying hardware, and
173        # (b) is not part of hwinit.
174        ctrl = 'config_files'
175        self.servo.get(ctrl)
176        outdir = self.grab_and_get_directory()
177        self.check_logs_ordered(outdir)
178        if not self.check_get_control_in_logs(outdir, [ctrl]):
179            raise error.TestFail('joint logfile log.DEBUG.txt does not contain '
180                                 'log artifact that was created after the first'
181                                 ' rotation')
182
183    def test_compressed(self):
184        """Subtest to verify multi file rotation and compression works.
185
186        Test rotates the logs many times to generate compression. Validate
187        that the output is properly decompressed with all expected lines.
188
189        @raises error.TestFail: if the log line saying that servod is listening
190                                on a port is missing
191        @raises error.TestFail: logline showing 'config_files' was issued is
192                                missing
193        @raises error.TestFail: any of the expected number of loglines showing
194                                'servod_pid' was issued is missing
195        """
196        logging.info('Running subtest to rotate logs past the compression '
197                     'threshold and validate that compressed logs are '
198                     'correctly decompressed upon grabbing.')
199        # First, rotate out the logs.
200        self.servo.set_nocheck('rotate_servod_logs', 'yes')
201        ctrl = 'config_files'
202        # Next issue a control that (a) requires no underlying hardware, and
203        # (b) is not part of hwinit.
204        self.servo.get(ctrl)
205        ctrls = [ctrl]
206        # Then, rotate out the logs and generate traffic for 6 additional times.
207        # That should trigger file compression.
208        # The command here has to be different to ensure that the first file
209        # that had the config_files in it got decompressed.
210        rot_ctrl = 'servod_pid'
211        for _ in range(6):
212            self.servo.set_nocheck('rotate_servod_logs', 'yes')
213            self.servo.get(rot_ctrl)
214            # Append every time to ensure it shows up the exact number of
215            # times in the logs.
216            ctrls.append(rot_ctrl)
217        outdir = self.grab_and_get_directory()
218        self.check_logs_ordered(outdir)
219        if not self.check_get_control_in_logs(outdir, ctrls):
220            raise error.TestFail('joint logfile log.DEBUG.txt does not contain '
221                                 'log artifact that was created after the '
222                                 'multiple rotation that leads to compression.')
223
224    def test_missing(self):
225        """Subtest to verify missing symlinks will grab no logs, throw no error.
226
227        @raises error.TestFail: deleting symlinks to the latest logs causes
228                                logs to still  be grabbed
229        """
230        logging.info('Running subtest to remove latest log symlinks and '
231                     'validate this results in a grab logs noop.')
232        self.servo_host.remove_latest_log_symlinks()
233        outdir = self.grab_and_get_directory()
234        if not outdir == None:
235            raise error.TestFail('Removing symlinks to latest logs still '
236                                 'caused test to find logs.')
237
238    def test_restart(self):
239        """Subtest to verify that a servod restart in the middle is caught.
240
241        @raises error.TestFail: if the logs from the previous instances are
242                                missing on a restart
243        """
244        restarts = 2
245        # Need to cache the initial timestamp so that we can reuse it later
246        initial_ts = self.servo_host.get_instance_logs_ts()
247        for _ in range(restarts):
248            self.servo_host.restart_servod()
249        self.servo_host._initial_instance_ts = initial_ts
250        # Grab the logs and probe for old, orphaned logs
251        ts = self.servo_host.get_instance_logs_ts()
252        self.servo_host.probe_servod_restart(ts, self.logtestdir)
253        self.servo_host.get_instance_logs(ts, self.logtestdir)
254        # First, validate that in total 3 directories were created: one
255        # for the current instance, and two for the old ones
256        dirs_created = len(os.listdir(self.logtestdir))
257        expected_dirs = restarts + 1
258        if dirs_created != expected_dirs:
259            logging.error('Should have created %d dirs from grabbing logs '
260                          'but only created %d.', expected_dirs, dirs_created)
261            raise error.TestFail('Grabbing logs from restarted instance failed')
262        # Check that panicinfo was called on the MCUs
263        # TODO(coconutruben): refactor this into one source of truth on which
264        # servos support panicinfo
265        # There should only ever be one directory here that doesn't have the
266        # suffix but if that logic changes in servo_host, then this needs to be
267        # refactored, or an API call added to servo_host
268        final_instance_dir = [d for d in os.listdir(self.logtestdir) if
269                              self.servo_host.OLD_LOG_SUFFIX not in d][0]
270        final_instance_dir_path = os.path.join(self.logtestdir,
271                                               final_instance_dir)
272        for mcu in ['servo_v4', 'servo_micro']:
273            mcu_path = os.path.join(final_instance_dir_path, '%s.txt' % mcu)
274            if os.path.exists(mcu_path):
275                with open(mcu_path, 'r') as f:
276                    if 'panicinfo' not in f.read():
277                        raise error.TestFail('No panicinfo call found in %r.' %
278                                             mcu_path)
279    def test_restart_no_servo(self):
280        """Subtest to verify that lack of |_servo| attribute yields no failure.
281
282        @raises error.TestFail: if the lack of _servo causes an issue
283        """
284        self.servo_host.servo = None
285        try:
286            self.test_restart()
287        except AttributeError as e:
288            raise error.TestFail('Setting |_servo| to None caused an error. %s'
289                                 % str(e))
290        except error.TestFail as e:
291            if 'No panicinfo call found in' in str(e):
292                # This means that the lack of |_servo| worked as intended e.g.
293                # the logs are grabbed, but the panicinfo could not be issued.
294                pass
295            # for any other issue, we need to actually raise it.
296            raise e
297        # Restore servo object again.
298        self.servo_host.servo = self.servo
299
300    def run_once(self, host):
301        """Try 3 scenarios of log grabbing: single, multiple, compressed."""
302        self.servo_host = host._servo_host
303        self.servo = host.servo
304        # Note that test_missing should run last as it is destructive and
305        # removes logs on the servo_host side. Alternatively, restart
306        # servod if another destructive test is added.
307        for subtest in [self.test_singular, self.test_dual,
308                        self.test_compressed, self.test_restart,
309                        self.test_restart_no_servo, self.test_missing]:
310            self.setup_dir()
311            subtest()
312            logging.info('Success')
313            self.cleanup_logs()
314
315    def cleanup_logs(self):
316        """Delete the temporary directory."""
317        shutil.rmtree(self.logtestdir)
318