xref: /aosp_15_r20/external/pytorch/torch/_inductor/compile_worker/watchdog.py (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1# mypy: allow-untyped-defs
2import os
3import signal
4from threading import Thread
5from time import sleep
6from typing import Optional
7
8
9# If this process dies abnormally (e.g. segfault)
10# it will not shut down the workers. Instead,
11# the workers will have their parent reassigned to the
12# init process. This launches a separate thread to
13# watch for the worker getting reassigned,
14# and cleans it up in this case.
15#
16# This function cannot be an inner function since otherwise mp_context="spawn" would
17# not work for ProcessPoolExecutor since inner functions cannot be pickled.
18def _async_compile_initializer(orig_ppid) -> None:
19    def run() -> None:
20        while True:
21            sleep(1)
22            if orig_ppid != os.getppid():
23                os.kill(os.getpid(), signal.SIGKILL)
24
25    global _watchdog_thread, _original_parent
26    _original_parent = orig_ppid
27    _watchdog_thread = Thread(target=run, daemon=True)
28    _watchdog_thread.start()
29    # Ignore Ctrl-C (i.e. SIGINT) sent to pool workers to avoid meaningless log spam.
30    signal.signal(signal.SIGINT, signal.SIG_IGN)
31
32
33_watchdog_thread: Optional[Thread] = None
34_original_parent: Optional[int] = None
35
36
37def has_parent_changed() -> bool:
38    return _original_parent != os.getppid()
39