1# Copyright 2024, The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Unittests for DaemonManager.""" 16 17import fcntl 18import logging 19import multiprocessing 20import os 21import pathlib 22import signal 23import subprocess 24import sys 25import tempfile 26import time 27import unittest 28from unittest import mock 29from edit_monitor import daemon_manager 30from proto import edit_event_pb2 31 32 33TEST_BINARY_FILE = '/path/to/test_binary' 34TEST_PID_FILE_PATH = ( 35 '587239c2d1050afdf54512e2d799f3b929f86b43575eb3c7b4bab105dd9bd25e.lock' 36) 37 38 39def simple_daemon(output_file): 40 with open(output_file, 'w') as f: 41 f.write('running daemon target') 42 43 44def long_running_daemon(): 45 while True: 46 time.sleep(1) 47 48 49def memory_consume_daemon_target(size_mb): 50 try: 51 size_bytes = size_mb * 1024 * 1024 52 dummy_data = bytearray(size_bytes) 53 time.sleep(10) 54 except MemoryError: 55 print(f'Process failed to allocate {size_mb} MB of memory.') 56 57 58def cpu_consume_daemon_target(target_usage_percent): 59 while True: 60 start_time = time.time() 61 while time.time() - start_time < target_usage_percent / 100: 62 pass # Busy loop to consume CPU 63 64 # Sleep to reduce CPU usage 65 time.sleep(1 - target_usage_percent / 100) 66 67 68class DaemonManagerTest(unittest.TestCase): 69 70 @classmethod 71 def setUpClass(cls): 72 super().setUpClass() 73 # Configure to print logging to stdout. 74 logging.basicConfig(filename=None, level=logging.DEBUG) 75 console = logging.StreamHandler(sys.stdout) 76 logging.getLogger('').addHandler(console) 77 78 def setUp(self): 79 super().setUp() 80 self.original_tempdir = tempfile.tempdir 81 self.working_dir = tempfile.TemporaryDirectory() 82 # Sets the tempdir under the working dir so any temp files created during 83 # tests will be cleaned. 84 tempfile.tempdir = self.working_dir.name 85 self.patch = mock.patch.dict( 86 os.environ, {'ENABLE_ANDROID_EDIT_MONITOR': 'true'} 87 ) 88 self.patch.start() 89 90 def tearDown(self): 91 # Cleans up any child processes left by the tests. 92 self._cleanup_child_processes() 93 self.working_dir.cleanup() 94 # Restores tempdir. 95 tempfile.tempdir = self.original_tempdir 96 self.patch.stop() 97 super().tearDown() 98 99 def test_start_success_with_no_existing_instance(self): 100 self.assert_run_simple_daemon_success() 101 102 def test_start_success_with_existing_instance_running(self): 103 # Create a running daemon subprocess 104 p = self._create_fake_deamon_process() 105 106 self.assert_run_simple_daemon_success() 107 self.assert_no_subprocess_running() 108 109 def test_start_success_with_existing_instance_already_dead(self): 110 # Create a pidfile with pid that does not exist. 111 pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath( 112 'edit_monitor' 113 ) 114 pid_file_path_dir.mkdir(parents=True, exist_ok=True) 115 with open(pid_file_path_dir.joinpath(TEST_PID_FILE_PATH), 'w') as f: 116 f.write('123456') 117 118 self.assert_run_simple_daemon_success() 119 120 def test_start_success_with_existing_instance_from_different_binary(self): 121 # First start an instance based on "some_binary_path" 122 existing_dm = daemon_manager.DaemonManager( 123 'some_binary_path', 124 daemon_target=long_running_daemon, 125 ) 126 existing_dm.start() 127 128 self.assert_run_simple_daemon_success() 129 existing_dm.stop() 130 131 def test_start_return_directly_if_block_sign_exists(self): 132 # Creates the block sign. 133 pathlib.Path(self.working_dir.name).joinpath( 134 daemon_manager.BLOCK_SIGN_FILE 135 ).touch() 136 137 dm = daemon_manager.DaemonManager(TEST_BINARY_FILE) 138 dm.start() 139 140 # Verify no daemon process is started. 141 self.assertIsNone(dm.daemon_process) 142 143 @mock.patch.dict( 144 os.environ, {'ENABLE_ANDROID_EDIT_MONITOR': 'false'}, clear=True 145 ) 146 def test_start_return_directly_if_disabled(self): 147 dm = daemon_manager.DaemonManager(TEST_BINARY_FILE) 148 dm.start() 149 150 # Verify no daemon process is started. 151 self.assertIsNone(dm.daemon_process) 152 153 def test_start_return_directly_if_in_cog_env(self): 154 dm = daemon_manager.DaemonManager( 155 '/google/cog/cloud/user/workspace/edit_monitor' 156 ) 157 dm.start() 158 159 # Verify no daemon process is started. 160 self.assertIsNone(dm.daemon_process) 161 162 def test_start_failed_other_instance_is_starting(self): 163 f = open( 164 pathlib.Path(self.working_dir.name).joinpath( 165 TEST_PID_FILE_PATH + '.setup' 166 ), 167 'w', 168 ) 169 # Acquire an exclusive lock 170 fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB) 171 172 dm = daemon_manager.DaemonManager(TEST_BINARY_FILE) 173 dm.start() 174 175 # Release the lock 176 fcntl.flock(f, fcntl.LOCK_UN) 177 f.close() 178 # Verify no daemon process is started. 179 self.assertIsNone(dm.daemon_process) 180 181 @mock.patch('os.kill') 182 def test_start_failed_to_kill_existing_instance(self, mock_kill): 183 mock_kill.side_effect = OSError('Unknown OSError') 184 pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath( 185 'edit_monitor' 186 ) 187 pid_file_path_dir.mkdir(parents=True, exist_ok=True) 188 with open(pid_file_path_dir.joinpath(TEST_PID_FILE_PATH), 'w') as f: 189 f.write('123456') 190 191 fake_cclient = FakeClearcutClient() 192 with self.assertRaises(OSError): 193 dm = daemon_manager.DaemonManager(TEST_BINARY_FILE, cclient=fake_cclient) 194 dm.start() 195 self._assert_error_event_logged( 196 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR 197 ) 198 199 def test_start_failed_to_write_pidfile(self): 200 pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath( 201 'edit_monitor' 202 ) 203 pid_file_path_dir.mkdir(parents=True, exist_ok=True) 204 205 # Makes the directory read-only so write pidfile will fail. 206 os.chmod(pid_file_path_dir, 0o555) 207 208 fake_cclient = FakeClearcutClient() 209 with self.assertRaises(PermissionError): 210 dm = daemon_manager.DaemonManager(TEST_BINARY_FILE, cclient=fake_cclient) 211 dm.start() 212 self._assert_error_event_logged( 213 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR 214 ) 215 216 def test_start_failed_to_start_daemon_process(self): 217 fake_cclient = FakeClearcutClient() 218 with self.assertRaises(TypeError): 219 dm = daemon_manager.DaemonManager( 220 TEST_BINARY_FILE, 221 daemon_target='wrong_target', 222 daemon_args=(1), 223 cclient=fake_cclient, 224 ) 225 dm.start() 226 self._assert_error_event_logged( 227 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_START_EDIT_MONITOR 228 ) 229 230 @mock.patch('os.execv') 231 def test_monitor_reboot_with_high_memory_usage(self, mock_execv): 232 fake_cclient = FakeClearcutClient() 233 binary_file = tempfile.NamedTemporaryFile( 234 dir=self.working_dir.name, delete=False 235 ) 236 237 dm = daemon_manager.DaemonManager( 238 binary_file.name, 239 daemon_target=memory_consume_daemon_target, 240 daemon_args=(2,), 241 cclient=fake_cclient, 242 ) 243 # set the fake total_memory_size 244 dm.total_memory_size = 100 * 1024 * 1024 245 dm.start() 246 dm.monitor_daemon(interval=1) 247 248 self.assertTrue(dm.max_memory_usage >= 0.02) 249 self.assert_no_subprocess_running() 250 self._assert_error_event_logged( 251 fake_cclient, 252 edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_MEMORY_USAGE, 253 ) 254 mock_execv.assert_called_once() 255 256 def test_monitor_daemon_subprocess_killed_high_cpu_usage(self): 257 fake_cclient = FakeClearcutClient() 258 dm = daemon_manager.DaemonManager( 259 TEST_BINARY_FILE, 260 daemon_target=cpu_consume_daemon_target, 261 daemon_args=(20,), 262 cclient=fake_cclient, 263 ) 264 dm.start() 265 dm.monitor_daemon(interval=1, cpu_threshold=20) 266 267 self.assertTrue(dm.max_cpu_usage >= 20) 268 self.assert_no_subprocess_running() 269 self._assert_error_event_logged( 270 fake_cclient, 271 edit_event_pb2.EditEvent.KILLED_DUE_TO_EXCEEDED_CPU_USAGE, 272 ) 273 274 @mock.patch('subprocess.check_output') 275 def test_monitor_daemon_failed_does_not_matter(self, mock_output): 276 mock_output.side_effect = OSError('Unknown OSError') 277 self.assert_run_simple_daemon_success() 278 279 @mock.patch('os.execv') 280 def test_monitor_daemon_reboot_triggered(self, mock_execv): 281 binary_file = tempfile.NamedTemporaryFile( 282 dir=self.working_dir.name, delete=False 283 ) 284 285 dm = daemon_manager.DaemonManager( 286 binary_file.name, 287 daemon_target=long_running_daemon, 288 ) 289 dm.start() 290 dm.monitor_daemon(reboot_timeout=0.5) 291 mock_execv.assert_called_once() 292 293 def test_stop_success(self): 294 dm = daemon_manager.DaemonManager( 295 TEST_BINARY_FILE, daemon_target=long_running_daemon 296 ) 297 dm.start() 298 dm.stop() 299 300 self.assert_no_subprocess_running() 301 self.assertFalse(dm.pid_file_path.exists()) 302 303 @mock.patch('os.kill') 304 def test_stop_failed_to_kill_daemon_process(self, mock_kill): 305 mock_kill.side_effect = OSError('Unknown OSError') 306 fake_cclient = FakeClearcutClient() 307 dm = daemon_manager.DaemonManager( 308 TEST_BINARY_FILE, 309 daemon_target=long_running_daemon, 310 cclient=fake_cclient, 311 ) 312 313 with self.assertRaises(SystemExit): 314 dm.start() 315 dm.stop() 316 self.assertTrue(dm.daemon_process.is_alive()) 317 self.assertTrue(dm.pid_file_path.exists()) 318 self._assert_error_event_logged( 319 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_STOP_EDIT_MONITOR 320 ) 321 322 @mock.patch('os.remove') 323 def test_stop_failed_to_remove_pidfile(self, mock_remove): 324 mock_remove.side_effect = OSError('Unknown OSError') 325 326 fake_cclient = FakeClearcutClient() 327 dm = daemon_manager.DaemonManager( 328 TEST_BINARY_FILE, 329 daemon_target=long_running_daemon, 330 cclient=fake_cclient, 331 ) 332 333 with self.assertRaises(SystemExit): 334 dm.start() 335 dm.stop() 336 self.assert_no_subprocess_running() 337 self.assertTrue(dm.pid_file_path.exists()) 338 339 self._assert_error_event_logged( 340 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_STOP_EDIT_MONITOR 341 ) 342 343 @mock.patch('os.execv') 344 def test_reboot_success(self, mock_execv): 345 binary_file = tempfile.NamedTemporaryFile( 346 dir=self.working_dir.name, delete=False 347 ) 348 349 dm = daemon_manager.DaemonManager( 350 binary_file.name, daemon_target=long_running_daemon 351 ) 352 dm.start() 353 dm.reboot() 354 355 # Verifies the old process is stopped 356 self.assert_no_subprocess_running() 357 self.assertFalse(dm.pid_file_path.exists()) 358 359 mock_execv.assert_called_once() 360 361 @mock.patch('os.execv') 362 def test_reboot_binary_no_longer_exists(self, mock_execv): 363 dm = daemon_manager.DaemonManager( 364 TEST_BINARY_FILE, daemon_target=long_running_daemon 365 ) 366 dm.start() 367 368 with self.assertRaises(SystemExit): 369 dm.reboot() 370 mock_execv.assert_not_called() 371 self.assertEqual(cm.exception.code, 0) 372 373 @mock.patch('os.execv') 374 def test_reboot_failed(self, mock_execv): 375 mock_execv.side_effect = OSError('Unknown OSError') 376 fake_cclient = FakeClearcutClient() 377 binary_file = tempfile.NamedTemporaryFile( 378 dir=self.working_dir.name, delete=False 379 ) 380 381 dm = daemon_manager.DaemonManager( 382 binary_file.name, 383 daemon_target=long_running_daemon, 384 cclient=fake_cclient, 385 ) 386 dm.start() 387 388 with self.assertRaises(SystemExit): 389 dm.reboot() 390 self.assertEqual(cm.exception.code, 1) 391 self._assert_error_event_logged( 392 fake_cclient, edit_event_pb2.EditEvent.FAILED_TO_REBOOT_EDIT_MONITOR 393 ) 394 395 @mock.patch('subprocess.check_output') 396 def test_cleanup_success(self, mock_check_output): 397 p = self._create_fake_deamon_process() 398 fake_cclient = FakeClearcutClient() 399 mock_check_output.return_value = f'user {p.pid} 1 1 1 1 1 edit_monitor arg' 400 401 dm = daemon_manager.DaemonManager( 402 TEST_BINARY_FILE, 403 daemon_target=long_running_daemon, 404 cclient=fake_cclient, 405 ) 406 dm.cleanup() 407 408 self.assertFalse(p.is_alive()) 409 self.assertTrue( 410 pathlib.Path(self.working_dir.name) 411 .joinpath(daemon_manager.BLOCK_SIGN_FILE) 412 .exists() 413 ) 414 415 def assert_run_simple_daemon_success(self): 416 damone_output_file = tempfile.NamedTemporaryFile( 417 dir=self.working_dir.name, delete=False 418 ) 419 dm = daemon_manager.DaemonManager( 420 TEST_BINARY_FILE, 421 daemon_target=simple_daemon, 422 daemon_args=(damone_output_file.name,), 423 ) 424 dm.start() 425 dm.monitor_daemon(interval=1) 426 427 # Verifies the expected pid file is created. 428 expected_pid_file_path = pathlib.Path(self.working_dir.name).joinpath( 429 'edit_monitor', TEST_PID_FILE_PATH 430 ) 431 self.assertTrue(expected_pid_file_path.exists()) 432 433 # Verify the daemon process is executed successfully. 434 with open(damone_output_file.name, 'r') as f: 435 contents = f.read() 436 self.assertEqual(contents, 'running daemon target') 437 438 def assert_no_subprocess_running(self): 439 child_pids = self._get_child_processes(os.getpid()) 440 for child_pid in child_pids: 441 self.assertFalse( 442 self._is_process_alive(child_pid), f'process {child_pid} still alive' 443 ) 444 445 def _get_child_processes(self, parent_pid: int) -> list[int]: 446 try: 447 output = subprocess.check_output( 448 ['ps', '-o', 'pid,ppid', '--no-headers'], text=True 449 ) 450 451 child_processes = [] 452 for line in output.splitlines(): 453 pid, ppid = line.split() 454 if int(ppid) == parent_pid: 455 child_processes.append(int(pid)) 456 return child_processes 457 except subprocess.CalledProcessError as e: 458 self.fail(f'failed to get child process, error: {e}') 459 460 def _is_process_alive(self, pid: int) -> bool: 461 try: 462 output = subprocess.check_output( 463 ['ps', '-p', str(pid), '-o', 'state='], text=True 464 ).strip() 465 state = output.split()[0] 466 return state != 'Z' # Check if the state is not 'Z' (zombie) 467 except subprocess.CalledProcessError: 468 return False 469 470 def _cleanup_child_processes(self): 471 child_pids = self._get_child_processes(os.getpid()) 472 for child_pid in child_pids: 473 try: 474 os.kill(child_pid, signal.SIGKILL) 475 except ProcessLookupError: 476 # process already terminated 477 pass 478 479 def _create_fake_deamon_process( 480 self, name: str = TEST_PID_FILE_PATH 481 ) -> multiprocessing.Process: 482 # Create a long running subprocess 483 p = multiprocessing.Process(target=long_running_daemon) 484 p.start() 485 486 # Create the pidfile with the subprocess pid 487 pid_file_path_dir = pathlib.Path(self.working_dir.name).joinpath( 488 'edit_monitor' 489 ) 490 pid_file_path_dir.mkdir(parents=True, exist_ok=True) 491 with open(pid_file_path_dir.joinpath(name), 'w') as f: 492 f.write(str(p.pid)) 493 return p 494 495 def _assert_error_event_logged(self, fake_cclient, error_type): 496 error_events = fake_cclient.get_sent_events() 497 self.assertEquals(len(error_events), 1) 498 self.assertEquals( 499 edit_event_pb2.EditEvent.FromString( 500 error_events[0].source_extension 501 ).edit_monitor_error_event.error_type, 502 error_type, 503 ) 504 505 506class FakeClearcutClient: 507 508 def __init__(self): 509 self.pending_log_events = [] 510 self.sent_log_event = [] 511 512 def log(self, log_event): 513 self.pending_log_events.append(log_event) 514 515 def flush_events(self): 516 self.sent_log_event.extend(self.pending_log_events) 517 self.pending_log_events.clear() 518 519 def get_sent_events(self): 520 return self.sent_log_event + self.pending_log_events 521 522 523if __name__ == '__main__': 524 unittest.main() 525