xref: /aosp_15_r20/build/make/tools/edit_monitor/daemon_manager_test.py (revision 9e94795a3d4ef5c1d47486f9a02bb378756cea8a)
1# Copyright 2024, The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Unittests for DaemonManager."""
16
17import fcntl
18import logging
19import multiprocessing
20import os
21import pathlib
22import signal
23import subprocess
24import sys
25import tempfile
26import time
27import unittest
28from unittest import mock
29from edit_monitor import daemon_manager
30from proto import edit_event_pb2
31
32
33TEST_BINARY_FILE = '/path/to/test_binary'
34TEST_PID_FILE_PATH = (
35    '587239c2d1050afdf54512e2d799f3b929f86b43575eb3c7b4bab105dd9bd25e.lock'
36)
37
38
39def simple_daemon(output_file):
40  with open(output_file, 'w') as f:
41    f.write('running daemon target')
42
43
44def long_running_daemon():
45  while True:
46    time.sleep(1)
47
48
49def memory_consume_daemon_target(size_mb):
50  try:
51    size_bytes = size_mb * 1024 * 1024
52    dummy_data = bytearray(size_bytes)
53    time.sleep(10)
54  except MemoryError:
55    print(f'Process failed to allocate {size_mb} MB of memory.')
56
57
58def cpu_consume_daemon_target(target_usage_percent):
59  while True:
60    start_time = time.time()
61    while time.time() - start_time < target_usage_percent / 100:
62      pass  # Busy loop to consume CPU
63
64    # Sleep to reduce CPU usage
65    time.sleep(1 - target_usage_percent / 100)
66
67
68class DaemonManagerTest(unittest.TestCase):
69
70  @classmethod
71  def setUpClass(cls):
72    super().setUpClass()
73    # Configure to print logging to stdout.
74    logging.basicConfig(filename=None, level=logging.DEBUG)
75    console = logging.StreamHandler(sys.stdout)
76    logging.getLogger('').addHandler(console)
77
78  def setUp(self):
79    super().setUp()
80    self.original_tempdir = tempfile.tempdir
81    self.working_dir = tempfile.TemporaryDirectory()
82    # Sets the tempdir under the working dir so any temp files created during
83    # tests will be cleaned.
84    tempfile.tempdir = self.working_dir.name
85    self.patch = mock.patch.dict(
86        os.environ, {'ENABLE_ANDROID_EDIT_MONITOR': 'true'}
87    )
88    self.patch.start()
89
90  def tearDown(self):
91    # Cleans up any child processes left by the tests.
92    self._cleanup_child_processes()
93    self.working_dir.cleanup()
94    # Restores tempdir.
95    tempfile.tempdir = self.original_tempdir
96    self.patch.stop()
97    super().tearDown()
98
99  def test_start_success_with_no_existing_instance(self):
100    self.assert_run_simple_daemon_success()
101
102  def test_start_success_with_existing_instance_running(self):
103    # Create a running daemon subprocess
104    p = self._create_fake_deamon_process()
105
106    self.assert_run_simple_daemon_success()
107    self.assert_no_subprocess_running()
108
109  def test_start_success_with_existing_instance_already_dead(self):
110    # Create a pidfile with pid that does not exist.
111    pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
112        'edit_monitor'
113    )
114    pid_file_path_dir.mkdir(parents=True, exist_ok=True)
115    with open(pid_file_path_dir.joinpath(TEST_PID_FILE_PATH), 'w') as f:
116      f.write('123456')
117
118    self.assert_run_simple_daemon_success()
119
120  def test_start_success_with_existing_instance_from_different_binary(self):
121    # First start an instance based on "some_binary_path"
122    existing_dm = daemon_manager.DaemonManager(
123        'some_binary_path',
124        daemon_target=long_running_daemon,
125    )
126    existing_dm.start()
127
128    self.assert_run_simple_daemon_success()
129    existing_dm.stop()
130
131  def test_start_return_directly_if_block_sign_exists(self):
132    # Creates the block sign.
133    pathlib.Path(self.working_dir.name).joinpath(
134        daemon_manager.BLOCK_SIGN_FILE
135    ).touch()
136
137    dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
138    dm.start()
139
140    # Verify no daemon process is started.
141    self.assertIsNone(dm.daemon_process)
142
143  @mock.patch.dict(
144      os.environ, {'ENABLE_ANDROID_EDIT_MONITOR': 'false'}, clear=True
145  )
146  def test_start_return_directly_if_disabled(self):
147    dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
148    dm.start()
149
150    # Verify no daemon process is started.
151    self.assertIsNone(dm.daemon_process)
152
153  def test_start_return_directly_if_in_cog_env(self):
154    dm = daemon_manager.DaemonManager(
155        '/google/cog/cloud/user/workspace/edit_monitor'
156    )
157    dm.start()
158
159    # Verify no daemon process is started.
160    self.assertIsNone(dm.daemon_process)
161
162  def test_start_failed_other_instance_is_starting(self):
163    f = open(
164        pathlib.Path(self.working_dir.name).joinpath(
165            TEST_PID_FILE_PATH + '.setup'
166        ),
167        'w',
168    )
169    # Acquire an exclusive lock
170    fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
171
172    dm = daemon_manager.DaemonManager(TEST_BINARY_FILE)
173    dm.start()
174
175    # Release the lock
176    fcntl.flock(f, fcntl.LOCK_UN)
177    f.close()
178    # Verify no daemon process is started.
179    self.assertIsNone(dm.daemon_process)
180
181  @mock.patch('os.kill')
182  def test_start_failed_to_kill_existing_instance(self, mock_kill):
183    mock_kill.side_effect = OSError('Unknown OSError')
184    pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
185        'edit_monitor'
186    )
187    pid_file_path_dir.mkdir(parents=True, exist_ok=True)
188    with open(pid_file_path_dir.joinpath(TEST_PID_FILE_PATH), 'w') as f:
189      f.write('123456')
190
191    fake_cclient = FakeClearcutClient()
192    with self.assertRaises(OSError):
193      dm = daemon_manager.DaemonManager(TEST_BINARY_FILE, cclient=fake_cclient)
194      dm.start()
195    self._assert_error_event_logged(
196        fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
197    )
198
199  def test_start_failed_to_write_pidfile(self):
200    pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
201        'edit_monitor'
202    )
203    pid_file_path_dir.mkdir(parents=True, exist_ok=True)
204
205    # Makes the directory read-only so write pidfile will fail.
206    os.chmod(pid_file_path_dir, 0o555)
207
208    fake_cclient = FakeClearcutClient()
209    with self.assertRaises(PermissionError):
210      dm = daemon_manager.DaemonManager(TEST_BINARY_FILE, cclient=fake_cclient)
211      dm.start()
212    self._assert_error_event_logged(
213        fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
214    )
215
216  def test_start_failed_to_start_daemon_process(self):
217    fake_cclient = FakeClearcutClient()
218    with self.assertRaises(TypeError):
219      dm = daemon_manager.DaemonManager(
220          TEST_BINARY_FILE,
221          daemon_target='wrong_target',
222          daemon_args=(1),
223          cclient=fake_cclient,
224      )
225      dm.start()
226    self._assert_error_event_logged(
227        fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR
228    )
229
230  @mock.patch('os.execv')
231  def test_monitor_reboot_with_high_memory_usage(self, mock_execv):
232    fake_cclient = FakeClearcutClient()
233    binary_file = tempfile.NamedTemporaryFile(
234        dir=self.working_dir.name, delete=False
235    )
236
237    dm = daemon_manager.DaemonManager(
238        binary_file.name,
239        daemon_target=memory_consume_daemon_target,
240        daemon_args=(2,),
241        cclient=fake_cclient,
242    )
243    # set the fake total_memory_size
244    dm.total_memory_size = 100 * 1024 * 1024
245    dm.start()
246    dm.monitor_daemon(interval=1)
247
248    self.assertTrue(dm.max_memory_usage >= 0.02)
249    self.assert_no_subprocess_running()
250    self._assert_error_event_logged(
251        fake_cclient,
252        edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_MEMORY_USAGE,
253    )
254    mock_execv.assert_called_once()
255
256  def test_monitor_daemon_subprocess_killed_high_cpu_usage(self):
257    fake_cclient = FakeClearcutClient()
258    dm = daemon_manager.DaemonManager(
259        TEST_BINARY_FILE,
260        daemon_target=cpu_consume_daemon_target,
261        daemon_args=(20,),
262        cclient=fake_cclient,
263    )
264    dm.start()
265    dm.monitor_daemon(interval=1, cpu_threshold=20)
266
267    self.assertTrue(dm.max_cpu_usage >= 20)
268    self.assert_no_subprocess_running()
269    self._assert_error_event_logged(
270        fake_cclient,
271        edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_CPU_USAGE,
272    )
273
274  @mock.patch('subprocess.check_output')
275  def test_monitor_daemon_failed_does_not_matter(self, mock_output):
276    mock_output.side_effect = OSError('Unknown OSError')
277    self.assert_run_simple_daemon_success()
278
279  @mock.patch('os.execv')
280  def test_monitor_daemon_reboot_triggered(self, mock_execv):
281    binary_file = tempfile.NamedTemporaryFile(
282        dir=self.working_dir.name, delete=False
283    )
284
285    dm = daemon_manager.DaemonManager(
286        binary_file.name,
287        daemon_target=long_running_daemon,
288    )
289    dm.start()
290    dm.monitor_daemon(reboot_timeout=0.5)
291    mock_execv.assert_called_once()
292
293  def test_stop_success(self):
294    dm = daemon_manager.DaemonManager(
295        TEST_BINARY_FILE, daemon_target=long_running_daemon
296    )
297    dm.start()
298    dm.stop()
299
300    self.assert_no_subprocess_running()
301    self.assertFalse(dm.pid_file_path.exists())
302
303  @mock.patch('os.kill')
304  def test_stop_failed_to_kill_daemon_process(self, mock_kill):
305    mock_kill.side_effect = OSError('Unknown OSError')
306    fake_cclient = FakeClearcutClient()
307    dm = daemon_manager.DaemonManager(
308        TEST_BINARY_FILE,
309        daemon_target=long_running_daemon,
310        cclient=fake_cclient,
311    )
312
313    with self.assertRaises(SystemExit):
314      dm.start()
315      dm.stop()
316      self.assertTrue(dm.daemon_process.is_alive())
317      self.assertTrue(dm.pid_file_path.exists())
318    self._assert_error_event_logged(
319        fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_STOP_EDIT_MONITOR
320    )
321
322  @mock.patch('os.remove')
323  def test_stop_failed_to_remove_pidfile(self, mock_remove):
324    mock_remove.side_effect = OSError('Unknown OSError')
325
326    fake_cclient = FakeClearcutClient()
327    dm = daemon_manager.DaemonManager(
328        TEST_BINARY_FILE,
329        daemon_target=long_running_daemon,
330        cclient=fake_cclient,
331    )
332
333    with self.assertRaises(SystemExit):
334      dm.start()
335      dm.stop()
336      self.assert_no_subprocess_running()
337      self.assertTrue(dm.pid_file_path.exists())
338
339    self._assert_error_event_logged(
340        fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_STOP_EDIT_MONITOR
341    )
342
343  @mock.patch('os.execv')
344  def test_reboot_success(self, mock_execv):
345    binary_file = tempfile.NamedTemporaryFile(
346        dir=self.working_dir.name, delete=False
347    )
348
349    dm = daemon_manager.DaemonManager(
350        binary_file.name, daemon_target=long_running_daemon
351    )
352    dm.start()
353    dm.reboot()
354
355    # Verifies the old process is stopped
356    self.assert_no_subprocess_running()
357    self.assertFalse(dm.pid_file_path.exists())
358
359    mock_execv.assert_called_once()
360
361  @mock.patch('os.execv')
362  def test_reboot_binary_no_longer_exists(self, mock_execv):
363    dm = daemon_manager.DaemonManager(
364        TEST_BINARY_FILE, daemon_target=long_running_daemon
365    )
366    dm.start()
367
368    with self.assertRaises(SystemExit):
369      dm.reboot()
370      mock_execv.assert_not_called()
371      self.assertEqual(cm.exception.code, 0)
372
373  @mock.patch('os.execv')
374  def test_reboot_failed(self, mock_execv):
375    mock_execv.side_effect = OSError('Unknown OSError')
376    fake_cclient = FakeClearcutClient()
377    binary_file = tempfile.NamedTemporaryFile(
378        dir=self.working_dir.name, delete=False
379    )
380
381    dm = daemon_manager.DaemonManager(
382        binary_file.name,
383        daemon_target=long_running_daemon,
384        cclient=fake_cclient,
385    )
386    dm.start()
387
388    with self.assertRaises(SystemExit):
389      dm.reboot()
390      self.assertEqual(cm.exception.code, 1)
391    self._assert_error_event_logged(
392        fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_REBOOT_EDIT_MONITOR
393    )
394
395  @mock.patch('subprocess.check_output')
396  def test_cleanup_success(self, mock_check_output):
397    p = self._create_fake_deamon_process()
398    fake_cclient = FakeClearcutClient()
399    mock_check_output.return_value = f'user {p.pid} 1 1 1 1 1 edit_monitor arg'
400
401    dm = daemon_manager.DaemonManager(
402        TEST_BINARY_FILE,
403        daemon_target=long_running_daemon,
404        cclient=fake_cclient,
405    )
406    dm.cleanup()
407
408    self.assertFalse(p.is_alive())
409    self.assertTrue(
410        pathlib.Path(self.working_dir.name)
411        .joinpath(daemon_manager.BLOCK_SIGN_FILE)
412        .exists()
413    )
414
415  def assert_run_simple_daemon_success(self):
416    damone_output_file = tempfile.NamedTemporaryFile(
417        dir=self.working_dir.name, delete=False
418    )
419    dm = daemon_manager.DaemonManager(
420        TEST_BINARY_FILE,
421        daemon_target=simple_daemon,
422        daemon_args=(damone_output_file.name,),
423    )
424    dm.start()
425    dm.monitor_daemon(interval=1)
426
427    # Verifies the expected pid file is created.
428    expected_pid_file_path = pathlib.Path(self.working_dir.name).joinpath(
429        'edit_monitor', TEST_PID_FILE_PATH
430    )
431    self.assertTrue(expected_pid_file_path.exists())
432
433    # Verify the daemon process is executed successfully.
434    with open(damone_output_file.name, 'r') as f:
435      contents = f.read()
436      self.assertEqual(contents, 'running daemon target')
437
438  def assert_no_subprocess_running(self):
439    child_pids = self._get_child_processes(os.getpid())
440    for child_pid in child_pids:
441      self.assertFalse(
442          self._is_process_alive(child_pid), f'process {child_pid} still alive'
443      )
444
445  def _get_child_processes(self, parent_pid: int) -> list[int]:
446    try:
447      output = subprocess.check_output(
448          ['ps', '-o', 'pid,ppid', '--no-headers'], text=True
449      )
450
451      child_processes = []
452      for line in output.splitlines():
453        pid, ppid = line.split()
454        if int(ppid) == parent_pid:
455          child_processes.append(int(pid))
456      return child_processes
457    except subprocess.CalledProcessError as e:
458      self.fail(f'failed to get child process, error: {e}')
459
460  def _is_process_alive(self, pid: int) -> bool:
461    try:
462      output = subprocess.check_output(
463          ['ps', '-p', str(pid), '-o', 'state='], text=True
464      ).strip()
465      state = output.split()[0]
466      return state != 'Z'  # Check if the state is not 'Z' (zombie)
467    except subprocess.CalledProcessError:
468      return False
469
470  def _cleanup_child_processes(self):
471    child_pids = self._get_child_processes(os.getpid())
472    for child_pid in child_pids:
473      try:
474        os.kill(child_pid, signal.SIGKILL)
475      except ProcessLookupError:
476        # process already terminated
477        pass
478
479  def _create_fake_deamon_process(
480      self, name: str = TEST_PID_FILE_PATH
481  ) -> multiprocessing.Process:
482    # Create a long running subprocess
483    p = multiprocessing.Process(target=long_running_daemon)
484    p.start()
485
486    # Create the pidfile with the subprocess pid
487    pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath(
488        'edit_monitor'
489    )
490    pid_file_path_dir.mkdir(parents=True, exist_ok=True)
491    with open(pid_file_path_dir.joinpath(name), 'w') as f:
492      f.write(str(p.pid))
493    return p
494
495  def _assert_error_event_logged(self, fake_cclient, error_type):
496    error_events = fake_cclient.get_sent_events()
497    self.assertEquals(len(error_events), 1)
498    self.assertEquals(
499        edit_event_pb2.EditEvent.FromString(
500            error_events[0].source_extension
501        ).edit_monitor_error_event.error_type,
502        error_type,
503    )
504
505
506class FakeClearcutClient:
507
508  def __init__(self):
509    self.pending_log_events = []
510    self.sent_log_event = []
511
512  def log(self, log_event):
513    self.pending_log_events.append(log_event)
514
515  def flush_events(self):
516    self.sent_log_event.extend(self.pending_log_events)
517    self.pending_log_events.clear()
518
519  def get_sent_events(self):
520    return self.sent_log_event + self.pending_log_events
521
522
523if __name__ == '__main__':
524  unittest.main()
525