xref: /aosp_15_r20/external/pytorch/torch/_utils_internal.py (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1# mypy: allow-untyped-defs
2import functools
3import logging
4import os
5import sys
6import tempfile
7from typing import Any, Dict, Optional
8
9import torch
10from torch._strobelight.compile_time_profiler import StrobelightCompileTimeProfiler
11
12
13log = logging.getLogger(__name__)
14
15if os.environ.get("TORCH_COMPILE_STROBELIGHT", False):
16    import shutil
17
18    if not shutil.which("strobeclient"):
19        log.info(
20            "TORCH_COMPILE_STROBELIGHT is true, but seems like you are not on a FB machine."
21        )
22    else:
23        log.info("Strobelight profiler is enabled via environment variable")
24        StrobelightCompileTimeProfiler.enable()
25
26# this arbitrary-looking assortment of functionality is provided here
27# to have a central place for overrideable behavior. The motivating
28# use is the FB build environment, where this source file is replaced
29# by an equivalent.
30
31if torch._running_with_deploy():
32    # __file__ is meaningless in the context of frozen torch used in torch deploy.
33    # setting empty torch_parent should allow below functions to operate without crashing,
34    # but it's unclear if there is a valid use case for them in the context of deploy.
35    torch_parent = ""
36else:
37    if os.path.basename(os.path.dirname(__file__)) == "shared":
38        torch_parent = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
39    else:
40        torch_parent = os.path.dirname(os.path.dirname(__file__))
41
42
43def get_file_path(*path_components: str) -> str:
44    return os.path.join(torch_parent, *path_components)
45
46
47def get_file_path_2(*path_components: str) -> str:
48    return os.path.join(*path_components)
49
50
51def get_writable_path(path: str) -> str:
52    if os.access(path, os.W_OK):
53        return path
54    return tempfile.mkdtemp(suffix=os.path.basename(path))
55
56
57def prepare_multiprocessing_environment(path: str) -> None:
58    pass
59
60
61def resolve_library_path(path: str) -> str:
62    return os.path.realpath(path)
63
64
65def throw_abstract_impl_not_imported_error(opname, module, context):
66    if module in sys.modules:
67        raise NotImplementedError(
68            f"{opname}: We could not find the fake impl for this operator. "
69        )
70    else:
71        raise NotImplementedError(
72            f"{opname}: We could not find the fake impl for this operator. "
73            f"The operator specified that you may need to import the '{module}' "
74            f"Python module to load the fake impl. {context}"
75        )
76
77
78# NB!  This treats "skip" kwarg specially!!
79def compile_time_strobelight_meta(phase_name):
80    def compile_time_strobelight_meta_inner(function):
81        @functools.wraps(function)
82        def wrapper_function(*args, **kwargs):
83            if "skip" in kwargs:
84                kwargs["skip"] = kwargs["skip"] + 1
85
86            if not StrobelightCompileTimeProfiler.enabled:
87                return function(*args, **kwargs)
88
89            return StrobelightCompileTimeProfiler.profile_compile_time(
90                function, phase_name, *args, **kwargs
91            )
92
93        return wrapper_function
94
95    return compile_time_strobelight_meta_inner
96
97
98# Meta only, see
99# https://www.internalfb.com/intern/wiki/ML_Workflow_Observability/User_Guides/Adding_instrumentation_to_your_code/
100#
101# This will cause an event to get logged to Scuba via the signposts API.  You
102# can view samples on the API at https://fburl.com/scuba/workflow_signpost/zh9wmpqs
103# we log to subsystem "torch", and the category and name you provide here.
104# Each of the arguments translate into a Scuba column.  We're still figuring
105# out local conventions in PyTorch, but category should be something like
106# "dynamo" or "inductor", and name should be a specific string describing what
107# kind of event happened.
108#
109# Killswitch is at
110# https://www.internalfb.com/intern/justknobs/?name=pytorch%2Fsignpost#event
111def signpost_event(category: str, name: str, parameters: Dict[str, Any]):
112    log.info("%s %s: %r", category, name, parameters)
113
114
115def log_compilation_event(metrics):
116    log.info("%s", metrics)
117
118
119def upload_graph(graph):
120    pass
121
122
123def set_pytorch_distributed_envs_from_justknobs():
124    pass
125
126
127def log_export_usage(**kwargs):
128    pass
129
130
131def log_trace_structured_event(*args, **kwargs) -> None:
132    pass
133
134
135def log_cache_bypass(*args, **kwargs) -> None:
136    pass
137
138
139def log_torchscript_usage(api: str, **kwargs):
140    _ = api
141    return
142
143
144def check_if_torch_exportable():
145    return False
146
147
148def log_torch_jit_trace_exportability(
149    api: str,
150    type_of_export: str,
151    export_outcome: str,
152    result: str,
153):
154    _, _, _, _ = api, type_of_export, export_outcome, result
155    return
156
157
158def capture_pre_autograd_graph_using_training_ir() -> bool:
159    return False
160
161
162class JustKnobsConfig:
163    """Represents a lazily loaded config
164
165    This is designed to be used to specify a value in a config.
166
167    i.e. foo.bar = JustknobsConfig(name="//foo:bar", env_name="FORCE_FOO_BAR")
168
169    Call .get() in order to access the value
170    i.e. if foo.bar.get():
171
172    Note that the value is fetched once, and then not allowed to change. This
173    means less suprises, at the downside that you may have to restart a job
174    to pick up an update.
175
176    It can also be set explicitly via set - i.e.
177    foo.bar = JustknobsConfig(name="//foo:bar")
178    foo.bar.set(True)
179
180    Note that this does allow for no JK name (so that you can use this to replace old configurations).
181    """
182
183    def __init__(
184        self, *, name: Optional[str] = None, env_name=None, default: bool = True
185    ):
186        self.name = name
187        self.env_name = env_name
188        self.default = default
189        self.value: Optional[bool] = None
190        self.executed_value = None
191
192    def set(self, value: bool):
193        self.value = value
194
195    def get(self):
196        if self.executed_value is None:
197            self.executed_value = justknobs_feature(
198                self.name,
199                config_value=self.value,
200                env_name=self.env_name,
201                default=self.default,
202            )
203        return self.executed_value
204
205    def __str__(self):
206        v = bool(self)
207        return f"JustknobsConfig(name={self.name}, env_name={self.env_name}, default={self.default} - evals_to={v})"
208
209    def __bool__(self):
210        return self.get()
211
212
213def justknobs_feature(
214    name: Optional[str], config_value=None, env_name=None, default: bool = True
215):
216    """Returns whether or not a specific justknob feature is enabled.
217
218    This is a slightly higher level API then justknobs_check, designed to make it "easy" to do the right thing.
219    The primary thing it does, is allow configuration to override JK by default, while retaining some features to force this
220    the other way during sevs.
221
222    The preference order (i.e. who wins first) in OSS (and FB) is
223    - Config if specified
224    - Environment Variable if specified
225    - JK (FB), or default (OSS)
226
227
228    Quickstart
229    Have a config variable
230    Make a JK which is set to your "enabled" value (generally true).
231    Use this feature to check it (if you set the JK to be false, change the default).
232    If you have an env variable, also use the function to check it.
233
234    Arguments:
235        name - This should correspond 1:1 to a JK name internally to FB.
236        env_name - If this is set, we'll try and read the value from environment variables
237        config_value - If this is set to anything other than None, we'll use this value by
238            default. Note that within FB, there is some functionality to force override these
239            configs
240        default - This is the value to return in OSS. This avoids having to write weird double
241            negatives within justknobs and the config code, if you just want to have the
242            killswitch work by having feature return True to turn off features
243
244    Requirements:
245        WARNING - Don't use this at import time - Simply pass in the existing config.
246        If you want to use this at config time, use JustKnobsConfig
247    """
248    if config_value is not None:
249        return config_value
250    if env_name is not None and ((env := os.getenv(env_name)) is not None):
251        env = env.upper()
252        if env in ("1", "TRUE"):
253            return True
254        if env in ("0", "FALSE"):
255            return False
256        log.error(
257            "Difficulty parsing env variable %s=%s for feature %s - Assuming env variable means true and returning True",
258            env_name,
259            env,
260            name,
261        )
262        # We could return default here, but that was confusing to log.
263        return True
264    if name is None:
265        return True
266    if not default:
267        return not justknobs_check(name)
268    return justknobs_check(name)
269
270
271def justknobs_check(name: str) -> bool:
272    """
273    This function can be used to killswitch functionality in FB prod,
274    where you can toggle this value to False in JK without having to
275    do a code push.  In OSS, we always have everything turned on all
276    the time, because downstream users can simply choose to not update
277    PyTorch.  (If more fine-grained enable/disable is needed, we could
278    potentially have a map we lookup name in to toggle behavior.  But
279    the point is that it's all tied to source code in OSS, since there's
280    no live server to query.)
281
282    This is the bare minimum functionality I needed to do some killswitches.
283    We have a more detailed plan at
284    https://docs.google.com/document/d/1Ukerh9_42SeGh89J-tGtecpHBPwGlkQ043pddkKb3PU/edit
285    In particular, in some circumstances it may be necessary to read in
286    a knob once at process start, and then use it consistently for the
287    rest of the process.  Future functionality will codify these patterns
288    into a better high level API.
289
290    WARNING: Do NOT call this function at module import time, JK is not
291    fork safe and you will break anyone who forks the process and then
292    hits JK again.
293    """
294    return True
295
296
297def justknobs_getval_int(name: str) -> int:
298    """
299    Read warning on justknobs_check
300    """
301    return 0
302
303
304def is_fb_unit_test() -> bool:
305    return False
306
307
308@functools.lru_cache(None)
309def max_clock_rate():
310    if not torch.version.hip:
311        from triton.testing import nvsmi
312
313        return nvsmi(["clocks.max.sm"])[0]
314    else:
315        # Manually set max-clock speeds on ROCm until equivalent nvmsi
316        # functionality in triton.testing or via pyamdsmi enablement. Required
317        # for test_snode_runtime unit tests.
318        gcn_arch = str(torch.cuda.get_device_properties(0).gcnArchName.split(":", 1)[0])
319        if "gfx94" in gcn_arch:
320            return 1700
321        elif "gfx90a" in gcn_arch:
322            return 1700
323        elif "gfx908" in gcn_arch:
324            return 1502
325        elif "gfx11" in gcn_arch:
326            return 1700
327        elif "gfx103" in gcn_arch:
328            return 1967
329        elif "gfx101" in gcn_arch:
330            return 1144
331        else:
332            return 1100
333
334
335TEST_MASTER_ADDR = "127.0.0.1"
336TEST_MASTER_PORT = 29500
337# USE_GLOBAL_DEPS controls whether __init__.py tries to load
338# libtorch_global_deps, see Note [Global dependencies]
339USE_GLOBAL_DEPS = True
340# USE_RTLD_GLOBAL_WITH_LIBTORCH controls whether __init__.py tries to load
341# _C.so with RTLD_GLOBAL during the call to dlopen.
342USE_RTLD_GLOBAL_WITH_LIBTORCH = False
343# If an op was defined in C++ and extended from Python using the
344# torch.library.register_fake, returns if we require that there be a
345# m.set_python_module("mylib.ops") call from C++ that associates
346# the C++ op with a python module.
347REQUIRES_SET_PYTHON_MODULE = False
348
349
350def maybe_upload_prof_stats_to_manifold(profile_path: str) -> Optional[str]:
351    print("Uploading profile stats (fb-only otherwise no-op)")
352    return None
353
354
355def log_chromium_event_internal(event, stack, logger_uuid, start_timestamp=None):
356    return None
357