xref: /aosp_15_r20/external/tensorflow/third_party/nccl/archive.BUILD (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# NVIDIA NCCL 2
2# A package of optimized primitives for collective multi-GPU communication.
3
4licenses(["notice"])
5
6exports_files(["LICENSE.txt"])
7
8load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library")
9load(
10    "@local_config_nccl//:build_defs.bzl",
11    "cuda_rdc_library",
12    "gen_device_srcs",
13)
14
15cc_library(
16    name = "src_hdrs",
17    hdrs = [
18        "src/include/collectives.h",
19        "src/nccl.h",
20    ],
21    strip_include_prefix = "src",
22)
23
24cc_library(
25    name = "include_hdrs",
26    hdrs = glob(["src/include/**"]),
27    strip_include_prefix = "src/include",
28    deps = ["@local_config_cuda//cuda:cuda_headers"],
29)
30
31cc_library(
32    name = "device_hdrs",
33    hdrs = glob(["src/collectives/device/*.h"]),
34    strip_include_prefix = "src/collectives/device",
35)
36
37# NCCL compiles the same source files with different NCCL_OP/NCCL_TYPE defines.
38# RDC compilation requires that each compiled module has a unique ID. Clang
39# derives the module ID from the path only so we need to copy the files to get
40# different IDs for different parts of compilation. NVCC does not have that
41# problem because it generates IDs based on preprocessed content.
42gen_device_srcs(
43    name = "device_srcs",
44    srcs = [
45        "src/collectives/device/all_gather.cu.cc",
46        "src/collectives/device/all_reduce.cu.cc",
47        "src/collectives/device/broadcast.cu.cc",
48        "src/collectives/device/reduce.cu.cc",
49        "src/collectives/device/reduce_scatter.cu.cc",
50        "src/collectives/device/sendrecv.cu.cc",
51    ],
52)
53
54cuda_rdc_library(
55    name = "device",
56    srcs = [
57        "src/collectives/device/functions.cu.cc",
58        "src/collectives/device/onerank_reduce.cu.cc",
59        ":device_srcs",
60    ] + glob([
61        # Required for header inclusion checking, see below for details.
62        "src/collectives/device/*.h",
63        "src/nccl.h",
64    ]),
65    deps = [
66        ":device_hdrs",
67        ":include_hdrs",
68        ":src_hdrs",
69        "@local_config_cuda//cuda:cuda_headers",
70    ],
71)
72
73cc_library(
74    name = "net",
75    srcs = [
76        "src/transport/coll_net.cc",
77        "src/transport/net.cc",
78    ],
79    linkopts = select({
80        "@org_tensorflow//tensorflow:macos": [],
81        "//conditions:default": ["-lrt"],
82    }),
83    deps = [
84        ":include_hdrs",
85        ":src_hdrs",
86    ],
87)
88
89cc_library(
90    name = "nccl",
91    srcs = glob(
92        include = [
93            "src/**/*.cc",
94            # Required for header inclusion checking, see below for details.
95            "src/graph/*.h",
96        ],
97        # Exclude device-library code.
98        exclude = [
99            "src/collectives/device/**",
100            "src/transport/coll_net.cc",
101            "src/transport/net.cc",
102        ],
103    ) + [
104        # Required for header inclusion checking (see
105        # http://docs.bazel.build/versions/master/be/c-cpp.html#hdrs).
106        # Files in src/ which #include "nccl.h" load it from there rather than
107        # from the virtual includes directory.
108        "src/include/collectives.h",
109        "src/nccl.h",
110    ],
111    hdrs = ["src/nccl.h"],
112    include_prefix = "third_party/nccl",
113    linkopts = select({
114        "@org_tensorflow//tensorflow:macos": [],
115        "//conditions:default": ["-lrt"],
116    }),
117    strip_include_prefix = "src",
118    visibility = ["//visibility:public"],
119    deps = [
120        ":device",
121        ":include_hdrs",
122        ":net",
123        ":src_hdrs",
124    ],
125)
126