xref: /aosp_15_r20/external/pytorch/torch/_inductor/cpp_builder.py (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1# This CPP builder is designed to support both Windows and Linux OS.
2# The design document please check this RFC: https://github.com/pytorch/pytorch/issues/124245
3
4import copy
5import errno
6import functools
7import json
8import logging
9import os
10import platform
11import re
12import shlex
13import shutil
14import subprocess
15import sys
16import sysconfig
17import warnings
18from ctypes import cdll
19from pathlib import Path
20from typing import Any, List, Optional, Sequence, Tuple, Union
21
22import torch
23from torch._dynamo.utils import dynamo_timed
24from torch._inductor import config, exc
25from torch._inductor.cpu_vec_isa import invalid_vec_isa, VecISA
26from torch._inductor.runtime.runtime_utils import cache_dir
27from torch.torch_version import TorchVersion
28
29
30if config.is_fbcode():
31    from triton.fb import build_paths  # noqa: F401
32
33    from torch._inductor.fb.utils import (
34        log_global_cache_errors,
35        log_global_cache_stats,
36        log_global_cache_vals,
37        use_global_cache,
38    )
39else:
40
41    def log_global_cache_errors(*args: Any, **kwargs: Any) -> None:
42        pass
43
44    def log_global_cache_stats(*args: Any, **kwargs: Any) -> None:
45        pass
46
47    def log_global_cache_vals(*args: Any, **kwargs: Any) -> None:
48        pass
49
50    def use_global_cache() -> bool:
51        return False
52
53
54# Windows need setup a temp dir to store .obj files.
55_BUILD_TEMP_DIR = "CxxBuild"
56
57# initialize variables for compilation
58_IS_LINUX = sys.platform.startswith("linux")
59_IS_MACOS = sys.platform.startswith("darwin")
60_IS_WINDOWS = sys.platform == "win32"
61
62SUBPROCESS_DECODE_ARGS = ("utf-8",) if _IS_WINDOWS else ()
63
64log = logging.getLogger(__name__)
65
66
67# =============================== toolchain ===============================
68@functools.lru_cache(1)
69def cpp_compiler_search(search: str) -> str:
70    from torch._inductor.codecache import get_lock_dir, LOCK_TIMEOUT
71
72    for cxx in search:
73        try:
74            if cxx is None:
75                # gxx package is only available for Linux
76                # according to https://anaconda.org/conda-forge/gxx/
77                if sys.platform != "linux":
78                    continue
79                # Do not install GXX by default
80                if not os.getenv("TORCH_INDUCTOR_INSTALL_GXX"):
81                    continue
82                from filelock import FileLock
83
84                lock_dir = get_lock_dir()
85                lock = FileLock(
86                    os.path.join(lock_dir, "g++.lock"), timeout=LOCK_TIMEOUT
87                )
88                with lock:
89                    cxx = install_gcc_via_conda()
90            subprocess.check_output([cxx, "--version"])
91            return cxx
92        except (subprocess.SubprocessError, FileNotFoundError, ImportError):
93            continue
94    raise exc.InvalidCxxCompiler
95
96
97def install_gcc_via_conda() -> str:
98    """On older systems, this is a quick way to get a modern compiler"""
99    prefix = os.path.join(cache_dir(), "gcc")
100    cxx_path = os.path.join(prefix, "bin", "g++")
101    if not os.path.exists(cxx_path):
102        log.info("Downloading GCC via conda")
103        conda = os.environ.get("CONDA_EXE", "conda")
104        if conda is None:
105            conda = shutil.which("conda")
106        if conda is not None:
107            subprocess.check_call(
108                [
109                    conda,
110                    "create",
111                    f"--prefix={prefix}",
112                    "--channel=conda-forge",
113                    "--quiet",
114                    "-y",
115                    "python=3.8",
116                    "gxx",
117                ],
118                stdout=subprocess.PIPE,
119            )
120    return cxx_path
121
122
123@functools.lru_cache(None)
124def check_compiler_exist_windows(compiler: str) -> None:
125    """
126    Check if compiler is ready, in case end user not activate MSVC environment.
127    """
128    try:
129        output_msg = (
130            subprocess.check_output([compiler, "/help"], stderr=subprocess.STDOUT)
131            .strip()
132            .decode(*SUBPROCESS_DECODE_ARGS)
133        )
134    except FileNotFoundError as exc:
135        raise RuntimeError(f"Compiler: {compiler} is not found.") from exc
136    except subprocess.SubprocessError:
137        # Expected that some compiler(clang, clang++) is exist, but they not support `/help` args.
138        pass
139
140
141def get_cpp_compiler() -> str:
142    if _IS_WINDOWS:
143        compiler = os.environ.get("CXX", "cl")
144        check_compiler_exist_windows(compiler)
145    else:
146        if config.is_fbcode():
147            return (
148                build_paths.cc() if torch.version.hip is None else build_paths.clang()
149            )
150        if isinstance(config.cpp.cxx, (list, tuple)):
151            search = tuple(config.cpp.cxx)
152        else:
153            search = (config.cpp.cxx,)
154        compiler = cpp_compiler_search(search)
155    return compiler
156
157
158@functools.lru_cache(None)
159def _is_apple_clang(cpp_compiler: str) -> bool:
160    version_string = subprocess.check_output([cpp_compiler, "--version"]).decode("utf8")
161    return "Apple" in version_string.splitlines()[0]
162
163
164def _is_clang(cpp_compiler: str) -> bool:
165    # Mac OS apple clang maybe named as gcc, need check compiler info.
166    if sys.platform == "darwin":
167        return _is_apple_clang(cpp_compiler)
168    elif _IS_WINDOWS:
169        # clang suite have many compilers, and only clang-cl is supported.
170        if re.search(r"((clang$)|(clang\+\+$))", cpp_compiler):
171            raise RuntimeError(
172                "Please use clang-cl, due to torch.compile only support MSVC-like CLI (compiler flags syntax)."
173            )
174        return bool(re.search(r"(clang-cl)", cpp_compiler))
175    return bool(re.search(r"(clang|clang\+\+)", cpp_compiler))
176
177
178def _is_gcc(cpp_compiler: str) -> bool:
179    if sys.platform == "darwin" and _is_apple_clang(cpp_compiler):
180        return False
181    return bool(re.search(r"(gcc|g\+\+)", cpp_compiler))
182
183
184@functools.lru_cache(None)
185def _is_msvc_cl(cpp_compiler: str) -> bool:
186    if not _IS_WINDOWS:
187        return False
188
189    try:
190        output_msg = (
191            subprocess.check_output([cpp_compiler, "/help"], stderr=subprocess.STDOUT)
192            .strip()
193            .decode(*SUBPROCESS_DECODE_ARGS)
194        )
195        return "Microsoft" in output_msg.splitlines()[0]
196    except FileNotFoundError as exc:
197        return False
198
199    return False
200
201
202@functools.lru_cache(None)
203def _is_intel_compiler(cpp_compiler: str) -> bool:
204    def _check_minimal_version(compiler_version: TorchVersion) -> None:
205        """
206        On Windows: early version icx has `-print-file-name` issue, and can't preload correctly for inductor.
207        """
208        min_version = "2024.2.1" if _IS_WINDOWS else "0.0.0"
209        if compiler_version < TorchVersion(min_version):
210            raise RuntimeError(
211                f"Intel Compiler error: less than minimal version {min_version}."
212            )
213
214    try:
215        output_msg = (
216            subprocess.check_output(
217                [cpp_compiler, "--version"], stderr=subprocess.DEVNULL
218            )
219            .strip()
220            .decode(*SUBPROCESS_DECODE_ARGS)
221        )
222        is_intel_compiler = "Intel" in output_msg.splitlines()[0]
223        if is_intel_compiler:
224            if _IS_WINDOWS:
225                if re.search(r"((icx$)|(icx-cc$))", cpp_compiler):
226                    raise RuntimeError(
227                        "Please use icx-cl, due to torch.compile only support MSVC-like CLI (compiler flags syntax)."
228                    )
229
230            # Version check
231            icx_ver_search = re.search(r"(\d+[.]\d+[.]\d+[.]\d+)", output_msg)
232            if icx_ver_search is not None:
233                icx_ver = icx_ver_search.group(1)
234                _check_minimal_version(TorchVersion(icx_ver))
235
236        return is_intel_compiler
237    except FileNotFoundError as exc:
238        return False
239    except subprocess.SubprocessError:
240        # --version args not support.
241        return False
242
243    return False
244
245
246@functools.lru_cache(None)
247def is_gcc() -> bool:
248    return _is_gcc(get_cpp_compiler())
249
250
251@functools.lru_cache(None)
252def is_clang() -> bool:
253    return _is_clang(get_cpp_compiler())
254
255
256@functools.lru_cache(None)
257def is_intel_compiler() -> bool:
258    return _is_intel_compiler(get_cpp_compiler())
259
260
261@functools.lru_cache(None)
262def is_apple_clang() -> bool:
263    return _is_apple_clang(get_cpp_compiler())
264
265
266@functools.lru_cache(None)
267def is_msvc_cl() -> bool:
268    return _is_msvc_cl(get_cpp_compiler())
269
270
271def get_compiler_version_info(compiler: str) -> str:
272    env = os.environ.copy()
273    env["LC_ALL"] = "C"  # Don't localize output
274    try:
275        version_string = subprocess.check_output(
276            [compiler, "-v"], stderr=subprocess.STDOUT, env=env
277        ).decode(*SUBPROCESS_DECODE_ARGS)
278    except Exception as e:
279        try:
280            version_string = subprocess.check_output(
281                [compiler, "--version"], stderr=subprocess.STDOUT, env=env
282            ).decode(*SUBPROCESS_DECODE_ARGS)
283        except Exception as e:
284            return ""
285    # Mutiple lines to one line string.
286    version_string = version_string.replace("\r", "_")
287    version_string = version_string.replace("\n", "_")
288    return version_string
289
290
291# =============================== cpp builder ===============================
292def _append_list(dest_list: List[str], src_list: List[str]) -> None:
293    for item in src_list:
294        dest_list.append(copy.deepcopy(item))
295
296
297def _remove_duplication_in_list(orig_list: List[str]) -> List[str]:
298    new_list: List[str] = []
299    for item in orig_list:
300        if item not in new_list:
301            new_list.append(item)
302    return new_list
303
304
305def _create_if_dir_not_exist(path_dir: str) -> None:
306    if not os.path.exists(path_dir):
307        try:
308            Path(path_dir).mkdir(parents=True, exist_ok=True)
309        except OSError as exc:  # Guard against race condition
310            if exc.errno != errno.EEXIST:
311                raise RuntimeError(  # noqa: TRY200 (Use `raise from`)
312                    f"Fail to create path {path_dir}"
313                )
314
315
316def _remove_dir(path_dir: str) -> None:
317    if os.path.exists(path_dir):
318        for root, dirs, files in os.walk(path_dir, topdown=False):
319            for name in files:
320                file_path = os.path.join(root, name)
321                os.remove(file_path)
322            for name in dirs:
323                dir_path = os.path.join(root, name)
324                os.rmdir(dir_path)
325        os.rmdir(path_dir)
326
327
328def _run_compile_cmd(cmd_line: str, cwd: str) -> bytes:
329    cmd = shlex.split(cmd_line)
330    try:
331        status = subprocess.check_output(args=cmd, cwd=cwd, stderr=subprocess.STDOUT)
332    except subprocess.CalledProcessError as e:
333        output = e.output.decode("utf-8")
334        openmp_problem = "'omp.h' file not found" in output or "libomp" in output
335        if openmp_problem and sys.platform == "darwin":
336            instruction = (
337                "\n\nOpenMP support not found. Please try one of the following solutions:\n"
338                "(1) Set the `CXX` environment variable to a compiler other than Apple clang++/g++ "
339                "that has builtin OpenMP support;\n"
340                "(2) install OpenMP via conda: `conda install llvm-openmp`;\n"
341                "(3) install libomp via brew: `brew install libomp`;\n"
342                "(4) manually setup OpenMP and set the `OMP_PREFIX` environment variable to point to a path"
343                " with `include/omp.h` under it."
344            )
345            output += instruction
346        raise exc.CppCompileError(cmd, output) from e
347    return status
348
349
350def run_compile_cmd(cmd_line: str, cwd: str) -> bytes:
351    with dynamo_timed("compile_file"):
352        return _run_compile_cmd(cmd_line, cwd)
353
354
355def normalize_path_separator(orig_path: str) -> str:
356    if _IS_WINDOWS:
357        return orig_path.replace(os.sep, "/")
358    return orig_path
359
360
361class BuildOptionsBase:
362    """
363    This is the Base class for store cxx build options, as a template.
364    Acturally, to build a cxx shared library. We just need to select a compiler
365    and maintains the suitable args.
366    """
367
368    def __init__(
369        self,
370        compiler: str = "",
371        definitions: Optional[List[str]] = None,
372        include_dirs: Optional[List[str]] = None,
373        cflags: Optional[List[str]] = None,
374        ldflags: Optional[List[str]] = None,
375        libraries_dirs: Optional[List[str]] = None,
376        libraries: Optional[List[str]] = None,
377        passthrough_args: Optional[List[str]] = None,
378        aot_mode: bool = False,
379        use_absolute_path: bool = False,
380        compile_only: bool = False,
381    ) -> None:
382        self._compiler = compiler
383        self._definations: List[str] = definitions or []
384        self._include_dirs: List[str] = include_dirs or []
385        self._cflags: List[str] = cflags or []
386        self._ldflags: List[str] = ldflags or []
387        self._libraries_dirs: List[str] = libraries_dirs or []
388        self._libraries: List[str] = libraries or []
389        # Some args is hard to abstract to OS compatable, passthough it directly.
390        self._passthough_args: List[str] = passthrough_args or []
391
392        self._aot_mode: bool = aot_mode
393        self._use_absolute_path: bool = use_absolute_path
394        self._compile_only: bool = compile_only
395
396    def _process_compile_only_options(self) -> None:
397        if self._compile_only:
398            self._libraries_dirs = []
399            self._libraries = []
400
401    def _remove_duplicate_options(self) -> None:
402        self._definations = _remove_duplication_in_list(self._definations)
403        self._include_dirs = _remove_duplication_in_list(self._include_dirs)
404        self._cflags = _remove_duplication_in_list(self._cflags)
405        self._ldflags = _remove_duplication_in_list(self._ldflags)
406        self._libraries_dirs = _remove_duplication_in_list(self._libraries_dirs)
407        self._libraries = _remove_duplication_in_list(self._libraries)
408        self._passthough_args = _remove_duplication_in_list(self._passthough_args)
409
410    def _finalize_options(self) -> None:
411        self._process_compile_only_options
412        self._remove_duplicate_options
413
414    def get_compiler(self) -> str:
415        return self._compiler
416
417    def get_definations(self) -> List[str]:
418        return self._definations
419
420    def get_include_dirs(self) -> List[str]:
421        return self._include_dirs
422
423    def get_cflags(self) -> List[str]:
424        return self._cflags
425
426    def get_ldflags(self) -> List[str]:
427        return self._ldflags
428
429    def get_libraries_dirs(self) -> List[str]:
430        return self._libraries_dirs
431
432    def get_libraries(self) -> List[str]:
433        return self._libraries
434
435    def get_passthough_args(self) -> List[str]:
436        return self._passthough_args
437
438    def get_aot_mode(self) -> bool:
439        return self._aot_mode
440
441    def get_use_absolute_path(self) -> bool:
442        return self._use_absolute_path
443
444    def get_compile_only(self) -> bool:
445        return self._compile_only
446
447    def save_flags_to_file(self, file: str) -> None:
448        attrs = {
449            "compiler": self.get_compiler(),
450            "definitions": self.get_definations(),
451            "include_dirs": self.get_include_dirs(),
452            "cflags": self.get_cflags(),
453            "ldflags": self.get_ldflags(),
454            "libraries_dirs": self.get_libraries_dirs(),
455            "libraries": self.get_libraries(),
456            "passthrough_args": self.get_passthough_args(),
457            "aot_mode": self.get_aot_mode(),
458            "use_absolute_path": self.get_use_absolute_path(),
459            "compile_only": self.get_compile_only(),
460        }
461
462        with open(file, "w") as f:
463            json.dump(attrs, f)
464
465
466def _get_warning_all_cflag(warning_all: bool = True) -> List[str]:
467    if not _IS_WINDOWS:
468        return ["Wall"] if warning_all else []
469    else:
470        return []
471
472
473def _get_cpp_std_cflag(std_num: str = "c++17") -> List[str]:
474    if _IS_WINDOWS:
475        """
476        On Windows, only c++20 can support `std::enable_if_t`.
477        Ref: https://learn.microsoft.com/en-us/cpp/overview/cpp-conformance-improvements-2019?view=msvc-170#checking-for-abstract-class-types # noqa: B950
478        Note:
479            Only setup c++20 for Windows inductor. I tried to upgrade all project to c++20, but it is failed:
480            https://github.com/pytorch/pytorch/pull/131504
481        """
482        std_num = "c++20"
483        return [f"std:{std_num}"]
484    else:
485        return [f"std={std_num}"]
486
487
488def _get_os_related_cpp_cflags(cpp_compiler: str) -> List[str]:
489    if _IS_WINDOWS:
490        cflags = [
491            "wd4819",
492            "wd4251",
493            "wd4244",
494            "wd4267",
495            "wd4275",
496            "wd4018",
497            "wd4190",
498            "wd4624",
499            "wd4067",
500            "wd4068",
501            "EHsc",
502        ]
503    else:
504        cflags = ["Wno-unused-variable", "Wno-unknown-pragmas"]
505        if _is_clang(cpp_compiler):
506            cflags.append("Werror=ignored-optimization-argument")
507    return cflags
508
509
510def _get_optimization_cflags() -> List[str]:
511    if _IS_WINDOWS:
512        return ["O2"]
513    else:
514        cflags = ["O0", "g"] if config.aot_inductor.debug_compile else ["O3", "DNDEBUG"]
515        cflags.append("ffast-math")
516        cflags.append("fno-finite-math-only")
517
518        if not config.cpp.enable_unsafe_math_opt_flag:
519            cflags.append("fno-unsafe-math-optimizations")
520        if not config.cpp.enable_floating_point_contract_flag:
521            cflags.append("ffp-contract=off")
522
523        if sys.platform != "darwin":
524            # https://stackoverflow.com/questions/65966969/why-does-march-native-not-work-on-apple-m1
525            # `-march=native` is unrecognized option on M1
526            if not config.is_fbcode():
527                if platform.machine() == "ppc64le":
528                    cflags.append("mcpu=native")
529                else:
530                    cflags.append("march=native")
531
532        return cflags
533
534
535def _get_shared_cflag(compile_only: bool) -> List[str]:
536    if _IS_WINDOWS:
537        """
538        MSVC `/MD` using python `ucrtbase.dll` lib as runtime.
539        https://learn.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=msvc-170
540        """
541        SHARED_FLAG = ["DLL", "MD"]
542    else:
543        if compile_only:
544            return ["fPIC"]
545        if platform.system() == "Darwin" and "clang" in get_cpp_compiler():
546            # This causes undefined symbols to behave the same as linux
547            return ["shared", "fPIC", "undefined dynamic_lookup"]
548        else:
549            return ["shared", "fPIC"]
550
551    return SHARED_FLAG
552
553
554def get_cpp_options(
555    cpp_compiler: str,
556    compile_only: bool,
557    warning_all: bool = True,
558    extra_flags: Sequence[str] = (),
559) -> Tuple[List[str], List[str], List[str], List[str], List[str], List[str], List[str]]:
560    definations: List[str] = []
561    include_dirs: List[str] = []
562    cflags: List[str] = []
563    ldflags: List[str] = []
564    libraries_dirs: List[str] = []
565    libraries: List[str] = []
566    passthough_args: List[str] = []
567
568    cflags = (
569        _get_shared_cflag(compile_only)
570        + _get_optimization_cflags()
571        + _get_warning_all_cflag(warning_all)
572        + _get_cpp_std_cflag()
573        + _get_os_related_cpp_cflags(cpp_compiler)
574    )
575
576    passthough_args.append(" ".join(extra_flags))
577
578    return (
579        definations,
580        include_dirs,
581        cflags,
582        ldflags,
583        libraries_dirs,
584        libraries,
585        passthough_args,
586    )
587
588
589class CppOptions(BuildOptionsBase):
590    """
591    This class is inherited from BuildOptionsBase, and as cxx build options.
592    This option need contains basic cxx build option, which contains:
593    1. OS related args.
594    2. Toolchains related args.
595    3. Cxx standard related args.
596    Note:
597    1. This Options is good for assist modules build, such as x86_isa_help.
598    """
599
600    def __init__(
601        self,
602        compile_only: bool = False,
603        warning_all: bool = True,
604        extra_flags: Sequence[str] = (),
605        use_absolute_path: bool = False,
606    ) -> None:
607        super().__init__()
608        self._compiler = get_cpp_compiler()
609        self._use_absolute_path = use_absolute_path
610        self._compile_only = compile_only
611
612        (
613            definations,
614            include_dirs,
615            cflags,
616            ldflags,
617            libraries_dirs,
618            libraries,
619            passthough_args,
620        ) = get_cpp_options(
621            cpp_compiler=self._compiler,
622            compile_only=compile_only,
623            extra_flags=extra_flags,
624            warning_all=warning_all,
625        )
626
627        _append_list(self._definations, definations)
628        _append_list(self._include_dirs, include_dirs)
629        _append_list(self._cflags, cflags)
630        _append_list(self._ldflags, ldflags)
631        _append_list(self._libraries_dirs, libraries_dirs)
632        _append_list(self._libraries, libraries)
633        _append_list(self._passthough_args, passthough_args)
634        self._finalize_options()
635
636
637def _get_glibcxx_abi_build_flags() -> List[str]:
638    if not _IS_WINDOWS:
639        return ["-D_GLIBCXX_USE_CXX11_ABI=" + str(int(torch._C._GLIBCXX_USE_CXX11_ABI))]
640    else:
641        return []
642
643
644def _get_torch_cpp_wrapper_defination() -> List[str]:
645    return ["TORCH_INDUCTOR_CPP_WRAPPER"]
646
647
648def _use_custom_generated_macros() -> List[str]:
649    return [" C10_USING_CUSTOM_GENERATED_MACROS"]
650
651
652def _use_fb_internal_macros() -> List[str]:
653    if not _IS_WINDOWS:
654        if config.is_fbcode():
655            fb_internal_macros = [
656                "C10_USE_GLOG",
657                "C10_USE_MINIMAL_GLOG",
658                "C10_DISABLE_TENSORIMPL_EXTENSIBILITY",
659            ]
660            # TODO: this is to avoid FC breakage for fbcode. When using newly
661            # generated model.so on an older verion of PyTorch, need to use
662            # the v1 version for aoti_torch_create_tensor_from_blob
663            create_tensor_from_blob_v1 = "AOTI_USE_CREATE_TENSOR_FROM_BLOB_V1"
664
665            fb_internal_macros.append(create_tensor_from_blob_v1)
666            return fb_internal_macros
667        else:
668            return []
669    else:
670        return []
671
672
673def _setup_standard_sys_libs(
674    cpp_compiler: str,
675    aot_mode: bool,
676    use_absolute_path: bool,
677) -> Tuple[List[str], List[str], List[str]]:
678    from torch._inductor.codecache import _LINKER_SCRIPT
679
680    cflags: List[str] = []
681    include_dirs: List[str] = []
682    passthough_args: List[str] = []
683    if _IS_WINDOWS:
684        return cflags, include_dirs, passthough_args
685
686    if config.is_fbcode():
687        cflags.append("nostdinc")
688        # Note that the order of include paths do matter, as a result
689        # we need to have several branches interleaved here
690        if torch.version.hip is None:
691            include_dirs.append(build_paths.sleef())
692        include_dirs.append(build_paths.openmp())
693        include_dirs.append(build_paths.python())
694        if torch.version.hip is not None:
695            include_dirs.append(build_paths.clang_include())
696            include_dirs.append(build_paths.gcc_include())
697            include_dirs.append(build_paths.gcc_install_tools_include())
698        else:
699            include_dirs.append(build_paths.cc_include())
700            include_dirs.append(build_paths.libgcc())
701            include_dirs.append(build_paths.libgcc_arch())
702        include_dirs.append(build_paths.libgcc_backward())
703        include_dirs.append(build_paths.glibc())
704        include_dirs.append(build_paths.linux_kernel())
705        include_dirs.append("include")
706
707        if aot_mode and not use_absolute_path:
708            linker_script = _LINKER_SCRIPT
709        else:
710            linker_script = os.path.basename(_LINKER_SCRIPT)
711
712        if _is_clang(cpp_compiler):
713            passthough_args.append(" --rtlib=compiler-rt")
714            passthough_args.append(" -fuse-ld=lld")
715            passthough_args.append(f" -Wl,--script={linker_script}")
716            passthough_args.append(" -B" + build_paths.glibc_lib())
717            passthough_args.append(" -L" + build_paths.glibc_lib())
718
719    return cflags, include_dirs, passthough_args
720
721
722def _get_build_args_of_chosen_isa(vec_isa: VecISA) -> Tuple[List[str], List[str]]:
723    macros = []
724    build_flags = []
725    if vec_isa != invalid_vec_isa:
726        # Add Windows support later.
727        for x in vec_isa.build_macro():
728            macros.append(copy.deepcopy(x))
729
730        build_flags = [vec_isa.build_arch_flags()]
731
732        if config.is_fbcode():
733            cap = str(vec_isa).upper()
734            macros = [
735                f"CPU_CAPABILITY={cap}",
736                f"CPU_CAPABILITY_{cap}",
737                f"HAVE_{cap}_CPU_DEFINITION",
738            ]
739
740    return macros, build_flags
741
742
743def _get_torch_related_args(
744    include_pytorch: bool, aot_mode: bool
745) -> Tuple[List[str], List[str], List[str]]:
746    from torch.utils.cpp_extension import _TORCH_PATH, TORCH_LIB_PATH
747
748    include_dirs = [
749        os.path.join(_TORCH_PATH, "include"),
750        os.path.join(_TORCH_PATH, "include", "torch", "csrc", "api", "include"),
751        # Some internal (old) Torch headers don't properly prefix their includes,
752        # so we need to pass -Itorch/lib/include/TH as well.
753        os.path.join(_TORCH_PATH, "include", "TH"),
754        os.path.join(_TORCH_PATH, "include", "THC"),
755    ]
756    libraries_dirs = [TORCH_LIB_PATH]
757    libraries = []
758    if sys.platform != "darwin" and not config.is_fbcode():
759        libraries = ["torch", "torch_cpu"]
760        if not aot_mode:
761            libraries.append("torch_python")
762
763    if _IS_WINDOWS:
764        libraries.append("sleef")
765
766    # Unconditionally import c10 for non-abi-compatible mode to use TORCH_CHECK - See PyTorch #108690
767    if not config.abi_compatible:
768        libraries.append("c10")
769        libraries_dirs.append(TORCH_LIB_PATH)
770
771    return include_dirs, libraries_dirs, libraries
772
773
774def _get_python_include_dirs() -> List[str]:
775    include_dir = Path(sysconfig.get_path("include"))
776    # On Darwin Python executable from a framework can return
777    # non-existing /Library/Python/... include path, in which case
778    # one should use Headers folder from the framework
779    if not include_dir.exists() and platform.system() == "Darwin":
780        std_lib = Path(sysconfig.get_path("stdlib"))
781        include_dir = (std_lib.parent.parent / "Headers").absolute()
782    if not (include_dir / "Python.h").exists():
783        warnings.warn(f"Can't find Python.h in {str(include_dir)}")
784    return [str(include_dir)]
785
786
787def _get_python_related_args() -> Tuple[List[str], List[str]]:
788    python_include_dirs = _get_python_include_dirs()
789    python_include_path = sysconfig.get_path(
790        "include", scheme="nt" if _IS_WINDOWS else "posix_prefix"
791    )
792    if python_include_path is not None:
793        python_include_dirs.append(python_include_path)
794
795    if _IS_WINDOWS:
796        python_path = os.path.dirname(sys.executable)
797        python_lib_path = [os.path.join(python_path, "libs")]
798    else:
799        python_lib_path = [sysconfig.get_config_var("LIBDIR")]
800
801    if config.is_fbcode():
802        python_include_dirs.append(build_paths.python())
803
804    return python_include_dirs, python_lib_path
805
806
807@functools.lru_cache(None)
808def is_conda_llvm_openmp_installed() -> bool:
809    try:
810        command = "conda list llvm-openmp --json"
811        output = subprocess.check_output(command.split()).decode("utf8")
812        return len(json.loads(output)) > 0
813    except subprocess.SubprocessError:
814        return False
815
816
817@functools.lru_cache(None)
818def homebrew_libomp() -> Tuple[bool, str]:
819    try:
820        # check if `brew` is installed
821        subprocess.check_output(["which", "brew"])
822        # get the location of `libomp` if it is installed
823        # this is the location that `libomp` **would** be installed
824        # see https://github.com/Homebrew/brew/issues/10261#issuecomment-756563567 for details
825        libomp_path = (
826            subprocess.check_output(["brew", "--prefix", "libomp"])
827            .decode("utf8")
828            .strip()
829        )
830        # check if `libomp` is installed
831        omp_available = os.path.exists(libomp_path)
832        return omp_available, libomp_path
833    except subprocess.SubprocessError:
834        return False, ""
835
836
837@functools.lru_cache(None)
838def perload_clang_libomp_win(cpp_compiler: str, omp_name: str) -> None:
839    try:
840        output = subprocess.check_output([cpp_compiler, "-print-file-name=bin"]).decode(
841            "utf8"
842        )
843        omp_path = os.path.join(output.rstrip(), omp_name)
844        if os.path.isfile(omp_path):
845            os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
846            omp_module = cdll.LoadLibrary(omp_path)
847    except subprocess.SubprocessError:
848        pass
849
850
851@functools.lru_cache(None)
852def perload_icx_libomp_win(cpp_compiler: str) -> None:
853    def _load_icx_built_in_lib_by_name(cpp_compiler: str, lib_name: str) -> bool:
854        try:
855            output = subprocess.check_output(
856                [cpp_compiler, f"-print-file-name={lib_name}"],
857                stderr=subprocess.DEVNULL,
858            ).decode(*SUBPROCESS_DECODE_ARGS)
859            omp_path = output.rstrip()
860            if os.path.isfile(omp_path):
861                os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
862                omp_module = cdll.LoadLibrary(omp_path)
863                return True
864        except subprocess.SubprocessError:
865            pass
866        return False
867
868    """
869    Intel Compiler implenmented more math libraries than clang, for performance proposal.
870    We need preload them like openmp library.
871    """
872    preload_list = [
873        "libiomp5md.dll",  # openmp
874        "svml_dispmd.dll",  # svml library
875        "libmmd.dll",  # libm
876    ]
877
878    for lib_name in preload_list:
879        _load_icx_built_in_lib_by_name(cpp_compiler, lib_name)
880
881
882def _get_openmp_args(
883    cpp_compiler: str,
884) -> Tuple[List[str], List[str], List[str], List[str], List[str], List[str]]:
885    cflags: List[str] = []
886    ldflags: List[str] = []
887    include_dir_paths: List[str] = []
888    lib_dir_paths: List[str] = []
889    libs: List[str] = []
890    passthough_args: List[str] = []
891    if _IS_MACOS:
892        # Per https://mac.r-project.org/openmp/ right way to pass `openmp` flags to MacOS is via `-Xclang`
893        cflags.append("Xclang")
894        cflags.append("fopenmp")
895
896        # only Apple builtin compilers (Apple Clang++) require openmp
897        omp_available = not _is_apple_clang(cpp_compiler)
898
899        # check the `OMP_PREFIX` environment first
900        omp_prefix = os.getenv("OMP_PREFIX")
901        if omp_prefix is not None:
902            header_path = os.path.join(omp_prefix, "include", "omp.h")
903            valid_env = os.path.exists(header_path)
904            if valid_env:
905                include_dir_paths.append(os.path.join(omp_prefix, "include"))
906                lib_dir_paths.append(os.path.join(omp_prefix, "lib"))
907            else:
908                warnings.warn("environment variable `OMP_PREFIX` is invalid.")
909            omp_available = omp_available or valid_env
910
911        if not omp_available:
912            libs.append("omp")
913
914        # prefer to use openmp from `conda install llvm-openmp`
915        conda_prefix = os.getenv("CONDA_PREFIX")
916        if not omp_available and conda_prefix is not None:
917            omp_available = is_conda_llvm_openmp_installed()
918            if omp_available:
919                conda_lib_path = os.path.join(conda_prefix, "lib")
920                include_dir_paths.append(os.path.join(conda_prefix, "include"))
921                lib_dir_paths.append(conda_lib_path)
922                # Prefer Intel OpenMP on x86 machine
923                if os.uname().machine == "x86_64" and os.path.exists(
924                    os.path.join(conda_lib_path, "libiomp5.dylib")
925                ):
926                    libs.append("iomp5")
927
928        # next, try to use openmp from `brew install libomp`
929        if not omp_available:
930            omp_available, libomp_path = homebrew_libomp()
931            if omp_available:
932                include_dir_paths.append(os.path.join(libomp_path, "include"))
933                lib_dir_paths.append(os.path.join(libomp_path, "lib"))
934
935        # if openmp is still not available, we let the compiler to have a try,
936        # and raise error together with instructions at compilation error later
937    elif _IS_WINDOWS:
938        """
939        On Windows, `clang` and `icx` have their specific openmp implenmention.
940        And the openmp lib is in compiler's some sub-directory.
941        For dynamic library(DLL) load, the Windows native APIs are `LoadLibraryA` and `LoadLibraryExA`, and their search
942        dependencies have some rules:
943        https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-loadlibraryexa#searching-for-dlls-and-dependencies
944        In some case, the rules may not include compiler's sub-directories.
945        So, it can't search and load compiler's openmp library correctly.
946        And then, the whole application would be broken.
947
948        To avoid the openmp load failed, we can automatic locate the openmp binary and preload it.
949        1. For clang, the function is `perload_clang_libomp_win`.
950        2. For icx, the function is `perload_icx_libomp_win`.
951        """
952        if _is_clang(cpp_compiler):
953            cflags.append("openmp")
954            libs.append("libomp")
955            perload_clang_libomp_win(cpp_compiler, "libomp.dll")
956        elif _is_intel_compiler(cpp_compiler):
957            cflags.append("Qiopenmp")
958            libs.append("libiomp5md")
959            perload_icx_libomp_win(cpp_compiler)
960        else:
961            # /openmp, /openmp:llvm
962            # llvm on Windows, new openmp: https://devblogs.microsoft.com/cppblog/msvc-openmp-update/
963            # msvc openmp: https://learn.microsoft.com/zh-cn/cpp/build/reference/openmp-enable-openmp-2-0-support?view=msvc-170
964            cflags.append("openmp")
965            cflags.append("openmp:experimental")  # MSVC CL
966    else:
967        if config.is_fbcode():
968            include_dir_paths.append(build_paths.openmp())
969
970            openmp_lib = build_paths.openmp_lib()
971            fb_openmp_extra_flags = f"-Wp,-fopenmp {openmp_lib}"
972            passthough_args.append(fb_openmp_extra_flags)
973
974            libs.append("omp")
975        else:
976            if _is_clang(cpp_compiler):
977                # TODO: fix issue, can't find omp.h
978                cflags.append("fopenmp")
979                libs.append("gomp")
980            elif _is_intel_compiler(cpp_compiler):
981                cflags.append("fiopenmp")
982            else:
983                cflags.append("fopenmp")
984                libs.append("gomp")
985
986    return cflags, ldflags, include_dir_paths, lib_dir_paths, libs, passthough_args
987
988
989def get_mmap_self_macro(use_mmap_weights: bool) -> List[str]:
990    macros = []
991    if use_mmap_weights:
992        macros.append(" USE_MMAP_SELF")
993    return macros
994
995
996def get_cpp_torch_options(
997    cpp_compiler: str,
998    vec_isa: VecISA,
999    include_pytorch: bool,
1000    aot_mode: bool,
1001    compile_only: bool,
1002    use_absolute_path: bool,
1003    use_mmap_weights: bool,
1004) -> Tuple[List[str], List[str], List[str], List[str], List[str], List[str], List[str]]:
1005    definations: List[str] = []
1006    include_dirs: List[str] = []
1007    cflags: List[str] = []
1008    ldflags: List[str] = []
1009    libraries_dirs: List[str] = []
1010    libraries: List[str] = []
1011    passthough_args: List[str] = []
1012
1013    torch_cpp_wrapper_definations = _get_torch_cpp_wrapper_defination()
1014    use_custom_generated_macros_definations = _use_custom_generated_macros()
1015
1016    (
1017        sys_libs_cflags,
1018        sys_libs_include_dirs,
1019        sys_libs_passthough_args,
1020    ) = _setup_standard_sys_libs(cpp_compiler, aot_mode, use_absolute_path)
1021
1022    isa_macros, isa_ps_args_build_flags = _get_build_args_of_chosen_isa(vec_isa)
1023
1024    (
1025        torch_include_dirs,
1026        torch_libraries_dirs,
1027        torch_libraries,
1028    ) = _get_torch_related_args(include_pytorch=include_pytorch, aot_mode=aot_mode)
1029
1030    python_include_dirs, python_libraries_dirs = _get_python_related_args()
1031
1032    (
1033        omp_cflags,
1034        omp_ldflags,
1035        omp_include_dir_paths,
1036        omp_lib_dir_paths,
1037        omp_lib,
1038        omp_passthough_args,
1039    ) = _get_openmp_args(cpp_compiler)
1040
1041    cxx_abi_passthough_args = _get_glibcxx_abi_build_flags()
1042    fb_macro_passthough_args = _use_fb_internal_macros()
1043
1044    mmap_self_macros = get_mmap_self_macro(use_mmap_weights)
1045
1046    definations = (
1047        torch_cpp_wrapper_definations
1048        + use_custom_generated_macros_definations
1049        + isa_macros
1050        + fb_macro_passthough_args
1051        + mmap_self_macros
1052    )
1053    include_dirs = (
1054        sys_libs_include_dirs
1055        + python_include_dirs
1056        + torch_include_dirs
1057        + omp_include_dir_paths
1058    )
1059    cflags = sys_libs_cflags + omp_cflags
1060    ldflags = omp_ldflags
1061    libraries_dirs = python_libraries_dirs + torch_libraries_dirs + omp_lib_dir_paths
1062    libraries = torch_libraries + omp_lib
1063    passthough_args = (
1064        sys_libs_passthough_args
1065        + isa_ps_args_build_flags
1066        + cxx_abi_passthough_args
1067        + omp_passthough_args
1068    )
1069
1070    return (
1071        definations,
1072        include_dirs,
1073        cflags,
1074        ldflags,
1075        libraries_dirs,
1076        libraries,
1077        passthough_args,
1078    )
1079
1080
1081class CppTorchOptions(CppOptions):
1082    """
1083    This class is inherited from CppTorchOptions, which automatic contains
1084    base cxx build options. And then it will maintains torch related build
1085    args.
1086    1. Torch include_directories, libraries, libraries_directories.
1087    2. Python include_directories, libraries, libraries_directories.
1088    3. OpenMP related.
1089    4. Torch MACROs.
1090    5. MISC
1091    """
1092
1093    def __init__(
1094        self,
1095        vec_isa: VecISA = invalid_vec_isa,
1096        include_pytorch: bool = False,
1097        warning_all: bool = True,
1098        aot_mode: bool = False,
1099        compile_only: bool = False,
1100        use_absolute_path: bool = False,
1101        use_mmap_weights: bool = False,
1102        shared: bool = True,
1103        extra_flags: Sequence[str] = (),
1104    ) -> None:
1105        super().__init__(
1106            compile_only=compile_only,
1107            warning_all=warning_all,
1108            extra_flags=extra_flags,
1109            use_absolute_path=use_absolute_path,
1110        )
1111
1112        self._aot_mode = aot_mode
1113
1114        (
1115            torch_definations,
1116            torch_include_dirs,
1117            torch_cflags,
1118            torch_ldflags,
1119            torch_libraries_dirs,
1120            torch_libraries,
1121            torch_passthough_args,
1122        ) = get_cpp_torch_options(
1123            cpp_compiler=self._compiler,
1124            vec_isa=vec_isa,
1125            include_pytorch=include_pytorch,
1126            aot_mode=aot_mode,
1127            compile_only=compile_only,
1128            use_absolute_path=use_absolute_path,
1129            use_mmap_weights=use_mmap_weights,
1130        )
1131
1132        _append_list(self._definations, torch_definations)
1133        _append_list(self._include_dirs, torch_include_dirs)
1134        _append_list(self._cflags, torch_cflags)
1135        _append_list(self._ldflags, torch_ldflags)
1136        _append_list(self._libraries_dirs, torch_libraries_dirs)
1137        _append_list(self._libraries, torch_libraries)
1138        _append_list(self._passthough_args, torch_passthough_args)
1139        self._finalize_options()
1140
1141
1142def _set_gpu_runtime_env() -> None:
1143    if (
1144        config.is_fbcode()
1145        and torch.version.hip is None
1146        and "CUDA_HOME" not in os.environ
1147        and "CUDA_PATH" not in os.environ
1148    ):
1149        os.environ["CUDA_HOME"] = build_paths.cuda()
1150
1151
1152def _transform_cuda_paths(lpaths: List[str]) -> None:
1153    # This handles two cases:
1154    # 1. Meta internal cuda-12 where libs are in lib/cuda-12 and lib/cuda-12/stubs
1155    # 2. Linux machines may have CUDA installed under either lib64/ or lib/
1156    for i, path in enumerate(lpaths):
1157        if (
1158            "CUDA_HOME" in os.environ
1159            and path.startswith(os.environ["CUDA_HOME"])
1160            and not os.path.exists(f"{path}/libcudart_static.a")
1161        ):
1162            for root, dirs, files in os.walk(path):
1163                if "libcudart_static.a" in files:
1164                    lpaths[i] = os.path.join(path, root)
1165                    lpaths.append(os.path.join(lpaths[i], "stubs"))
1166                    break
1167
1168
1169def get_cpp_torch_cuda_options(
1170    cuda: bool,
1171    aot_mode: bool = False,
1172    compile_only: bool = False,
1173) -> Tuple[List[str], List[str], List[str], List[str], List[str], List[str], List[str]]:
1174    definations: List[str] = []
1175    include_dirs: List[str] = []
1176    cflags: List[str] = []
1177    ldflags: List[str] = []
1178    libraries_dirs: List[str] = []
1179    libraries: List[str] = []
1180    passthough_args: List[str] = []
1181    if (
1182        config.is_fbcode()
1183        and "CUDA_HOME" not in os.environ
1184        and "CUDA_PATH" not in os.environ
1185    ):
1186        os.environ["CUDA_HOME"] = (
1187            build_paths.rocm() if torch.version.hip else build_paths.cuda()
1188        )
1189
1190    _set_gpu_runtime_env()
1191    from torch.utils import cpp_extension
1192
1193    include_dirs = cpp_extension.include_paths(cuda)
1194    libraries_dirs = cpp_extension.library_paths(cuda)
1195
1196    if cuda:
1197        definations.append(" USE_ROCM" if torch.version.hip else " USE_CUDA")
1198
1199        if torch.version.hip is not None:
1200            if config.is_fbcode():
1201                libraries += ["amdhip64"]
1202            else:
1203                libraries += ["c10_hip", "torch_hip"]
1204            definations.append(" __HIP_PLATFORM_AMD__")
1205        else:
1206            if config.is_fbcode():
1207                libraries += ["cuda"]
1208            else:
1209                libraries += ["c10_cuda", "cuda", "torch_cuda"]
1210
1211    if aot_mode:
1212        if config.is_fbcode():
1213            from torch._inductor.codecache import cpp_prefix_path
1214
1215            cpp_prefix_include_dir = [f"{os.path.dirname(cpp_prefix_path())}"]
1216            include_dirs += cpp_prefix_include_dir
1217
1218        if cuda and torch.version.hip is None:
1219            _transform_cuda_paths(libraries_dirs)
1220
1221    if config.is_fbcode():
1222        if torch.version.hip is not None:
1223            include_dirs.append(os.path.join(build_paths.rocm(), "include"))
1224        else:
1225            include_dirs.append(os.path.join(build_paths.cuda(), "include"))
1226
1227        if aot_mode and cuda:
1228            if torch.version.hip is None:
1229                if not compile_only:
1230                    # Only add link args, when compile_only is false.
1231                    passthough_args = ["-Wl,-Bstatic -lcudart_static -Wl,-Bdynamic"]
1232
1233    return (
1234        definations,
1235        include_dirs,
1236        cflags,
1237        ldflags,
1238        libraries_dirs,
1239        libraries,
1240        passthough_args,
1241    )
1242
1243
1244class CppTorchCudaOptions(CppTorchOptions):
1245    """
1246    This class is inherited from CppTorchOptions, which automatic contains
1247    base cxx build options and torch common build options. And then it will
1248    maintains cuda device related build args.
1249    """
1250
1251    def __init__(
1252        self,
1253        vec_isa: VecISA = invalid_vec_isa,
1254        include_pytorch: bool = False,
1255        cuda: bool = True,
1256        aot_mode: bool = False,
1257        compile_only: bool = False,
1258        use_absolute_path: bool = False,
1259        use_mmap_weights: bool = False,
1260        shared: bool = True,
1261        extra_flags: Sequence[str] = (),
1262    ) -> None:
1263        super().__init__(
1264            vec_isa=vec_isa,
1265            include_pytorch=include_pytorch,
1266            aot_mode=aot_mode,
1267            compile_only=compile_only,
1268            use_absolute_path=use_absolute_path,
1269            use_mmap_weights=use_mmap_weights,
1270            extra_flags=extra_flags,
1271        )
1272
1273        cuda_definations: List[str] = []
1274        cuda_include_dirs: List[str] = []
1275        cuda_cflags: List[str] = []
1276        cuda_ldflags: List[str] = []
1277        cuda_libraries_dirs: List[str] = []
1278        cuda_libraries: List[str] = []
1279        cuda_passthough_args: List[str] = []
1280
1281        (
1282            cuda_definations,
1283            cuda_include_dirs,
1284            cuda_cflags,
1285            cuda_ldflags,
1286            cuda_libraries_dirs,
1287            cuda_libraries,
1288            cuda_passthough_args,
1289        ) = get_cpp_torch_cuda_options(
1290            cuda=cuda, aot_mode=aot_mode, compile_only=compile_only
1291        )
1292        _append_list(self._definations, cuda_definations)
1293        _append_list(self._include_dirs, cuda_include_dirs)
1294        _append_list(self._cflags, cuda_cflags)
1295        _append_list(self._ldflags, cuda_ldflags)
1296        _append_list(self._libraries_dirs, cuda_libraries_dirs)
1297        _append_list(self._libraries, cuda_libraries)
1298        _append_list(self._passthough_args, cuda_passthough_args)
1299        self._finalize_options()
1300
1301
1302def get_name_and_dir_from_output_file_path(
1303    file_path: str,
1304) -> Tuple[str, str]:
1305    """
1306    This function help prepare parameters to new cpp_builder.
1307    Example:
1308        input_code: /tmp/tmpof1n5g7t/5c/c5crkkcdvhdxpktrmjxbqkqyq5hmxpqsfza4pxcf3mwk42lphygc.cpp
1309        name, dir = get_name_and_dir_from_output_file_path(input_code)
1310    Run result:
1311        name = c5crkkcdvhdxpktrmjxbqkqyq5hmxpqsfza4pxcf3mwk42lphygc
1312        dir = /tmp/tmpof1n5g7t/5c/
1313
1314    put 'name' and 'dir' to CppBuilder's 'name' and 'output_dir'.
1315    CppBuilder --> get_target_file_path will format output path accoding OS:
1316    Linux: /tmp/tmppu87g3mm/zh/czhwiz4z7ca7ep3qkxenxerfjxy42kehw6h5cjk6ven4qu4hql4i.so
1317    Windows: [Windows temp path]/tmppu87g3mm/zh/czhwiz4z7ca7ep3qkxenxerfjxy42kehw6h5cjk6ven4qu4hql4i.dll
1318    """
1319    name_and_ext = os.path.basename(file_path)
1320    name, ext = os.path.splitext(name_and_ext)
1321    dir = os.path.dirname(file_path)
1322
1323    return name, dir
1324
1325
1326class CppBuilder:
1327    """
1328    CppBuilder is a cpp jit builder, and it supports both Windows, Linux and MacOS.
1329    Args:
1330        name:
1331            1. Build target name, the final target file will append extension type automatically.
1332            2. Due to the CppBuilder is supports mutliple OS, it will maintains ext for OS difference.
1333        sources:
1334            Source code file list to be built.
1335        BuildOption:
1336            Build options to the builder.
1337        output_dir:
1338            1. The output_dir the taget file will output to.
1339            2. The default value is empty string, and then the use current dir as output dir.
1340            3. Final target file: output_dir/name.ext
1341    """
1342
1343    def __get_python_module_ext(self) -> str:
1344        SHARED_LIB_EXT = ".pyd" if _IS_WINDOWS else ".so"
1345        return SHARED_LIB_EXT
1346
1347    def __get_object_ext(self) -> str:
1348        EXT = ".obj" if _IS_WINDOWS else ".o"
1349        return EXT
1350
1351    def __init__(
1352        self,
1353        name: str,
1354        sources: Union[str, List[str]],
1355        BuildOption: BuildOptionsBase,
1356        output_dir: str = "",
1357    ) -> None:
1358        self._compiler = ""
1359        self._cflags_args = ""
1360        self._definations_args = ""
1361        self._include_dirs_args = ""
1362        self._ldflags_args = ""
1363        self._libraries_dirs_args = ""
1364        self._libraries_args = ""
1365        self._passthough_parameters_args = ""
1366
1367        self._output_dir = ""
1368        self._target_file = ""
1369
1370        self._use_absolute_path: bool = False
1371        self._aot_mode: bool = False
1372
1373        self._name = name
1374
1375        # Code start here, initial self internal veriables firstly.
1376        self._compiler = BuildOption.get_compiler()
1377        self._use_absolute_path = BuildOption.get_use_absolute_path()
1378        self._aot_mode = BuildOption.get_aot_mode()
1379
1380        self._output_dir = output_dir
1381
1382        self._compile_only = BuildOption.get_compile_only()
1383        file_ext = (
1384            self.__get_object_ext()
1385            if self._compile_only
1386            else self.__get_python_module_ext()
1387        )
1388        self._target_file = os.path.join(self._output_dir, f"{self._name}{file_ext}")
1389
1390        if isinstance(sources, str):
1391            sources = [sources]
1392
1393        if config.is_fbcode():
1394            if self._aot_mode and not self._use_absolute_path:
1395                inp_name = sources
1396                # output process @ get_name_and_dir_from_output_file_path
1397            else:
1398                # We need to copy any absolute-path torch includes
1399                inp_name = [os.path.basename(i) for i in sources]
1400                self._target_file = os.path.basename(self._target_file)
1401
1402            self._sources_args = " ".join(inp_name)
1403        else:
1404            self._sources_args = " ".join(sources)
1405
1406        for cflag in BuildOption.get_cflags():
1407            if _IS_WINDOWS:
1408                self._cflags_args += f"/{cflag} "
1409            else:
1410                self._cflags_args += f"-{cflag} "
1411
1412        for defination in BuildOption.get_definations():
1413            if _IS_WINDOWS:
1414                self._definations_args += f"/D {defination} "
1415            else:
1416                self._definations_args += f"-D {defination} "
1417
1418        for inc_dir in BuildOption.get_include_dirs():
1419            if _IS_WINDOWS:
1420                self._include_dirs_args += f"/I {inc_dir} "
1421            else:
1422                self._include_dirs_args += f"-I{inc_dir} "
1423
1424        for ldflag in BuildOption.get_ldflags():
1425            if _IS_WINDOWS:
1426                self._ldflags_args += f"/{ldflag} "
1427            else:
1428                self._ldflags_args += f"-{ldflag} "
1429
1430        for lib_dir in BuildOption.get_libraries_dirs():
1431            if _IS_WINDOWS:
1432                self._libraries_dirs_args += f'/LIBPATH:"{lib_dir}" '
1433            else:
1434                self._libraries_dirs_args += f"-L{lib_dir} "
1435
1436        for lib in BuildOption.get_libraries():
1437            if _IS_WINDOWS:
1438                self._libraries_args += f'"{lib}.lib" '
1439            else:
1440                self._libraries_args += f"-l{lib} "
1441
1442        for passthough_arg in BuildOption.get_passthough_args():
1443            self._passthough_parameters_args += f"{passthough_arg} "
1444
1445    def get_command_line(self) -> str:
1446        def format_build_command(
1447            compiler: str,
1448            sources: str,
1449            include_dirs_args: str,
1450            definations_args: str,
1451            cflags_args: str,
1452            ldflags_args: str,
1453            libraries_args: str,
1454            libraries_dirs_args: str,
1455            passthougn_args: str,
1456            target_file: str,
1457        ) -> str:
1458            if _IS_WINDOWS:
1459                # https://learn.microsoft.com/en-us/cpp/build/walkthrough-compile-a-c-program-on-the-command-line?view=msvc-1704
1460                # https://stackoverflow.com/a/31566153
1461                cmd = (
1462                    f"{compiler} {include_dirs_args} {definations_args} {cflags_args} {sources} "
1463                    f"{passthougn_args} /LD /Fe{target_file} /link {libraries_dirs_args} {libraries_args} {ldflags_args} "
1464                )
1465                cmd = normalize_path_separator(cmd)
1466            else:
1467                compile_only_arg = "-c" if self._compile_only else ""
1468                cmd = re.sub(
1469                    r"[ \n]+",
1470                    " ",
1471                    f"""
1472                    {compiler} {sources} {definations_args} {cflags_args} {include_dirs_args}
1473                    {passthougn_args} {ldflags_args} {libraries_args} {libraries_dirs_args} {compile_only_arg} -o {target_file}
1474                    """,
1475                ).strip()
1476            return cmd
1477
1478        command_line = format_build_command(
1479            compiler=self._compiler,
1480            sources=self._sources_args,
1481            include_dirs_args=self._include_dirs_args,
1482            definations_args=self._definations_args,
1483            cflags_args=self._cflags_args,
1484            ldflags_args=self._ldflags_args,
1485            libraries_args=self._libraries_args,
1486            libraries_dirs_args=self._libraries_dirs_args,
1487            passthougn_args=self._passthough_parameters_args,
1488            target_file=self._target_file,
1489        )
1490        return command_line
1491
1492    def get_target_file_path(self) -> str:
1493        return normalize_path_separator(self._target_file)
1494
1495    def build(self) -> Tuple[bytes, str]:
1496        """
1497        It is must need a temperary directory to store object files in Windows.
1498        After build completed, delete the temperary directory to save disk space.
1499        """
1500        _create_if_dir_not_exist(self._output_dir)
1501        _build_tmp_dir = os.path.join(
1502            self._output_dir, f"{self._name}_{_BUILD_TEMP_DIR}"
1503        )
1504        _create_if_dir_not_exist(_build_tmp_dir)
1505
1506        build_cmd = self.get_command_line()
1507
1508        status = run_compile_cmd(build_cmd, cwd=_build_tmp_dir)
1509
1510        _remove_dir(_build_tmp_dir)
1511        return status, self._target_file
1512