1# NVIDIA NCCL 2 2# A package of optimized primitives for collective multi-GPU communication. 3 4licenses(["notice"]) 5 6exports_files(["LICENSE.txt"]) 7 8load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library") 9load( 10 "@local_config_nccl//:build_defs.bzl", 11 "cuda_rdc_library", 12 "gen_device_srcs", 13) 14 15cc_library( 16 name = "src_hdrs", 17 hdrs = [ 18 "src/include/collectives.h", 19 "src/nccl.h", 20 ], 21 strip_include_prefix = "src", 22) 23 24cc_library( 25 name = "include_hdrs", 26 hdrs = glob(["src/include/**"]), 27 strip_include_prefix = "src/include", 28 deps = ["@local_config_cuda//cuda:cuda_headers"], 29) 30 31cc_library( 32 name = "device_hdrs", 33 hdrs = glob(["src/collectives/device/*.h"]), 34 strip_include_prefix = "src/collectives/device", 35) 36 37# NCCL compiles the same source files with different NCCL_OP/NCCL_TYPE defines. 38# RDC compilation requires that each compiled module has a unique ID. Clang 39# derives the module ID from the path only so we need to copy the files to get 40# different IDs for different parts of compilation. NVCC does not have that 41# problem because it generates IDs based on preprocessed content. 42gen_device_srcs( 43 name = "device_srcs", 44 srcs = [ 45 "src/collectives/device/all_gather.cu.cc", 46 "src/collectives/device/all_reduce.cu.cc", 47 "src/collectives/device/broadcast.cu.cc", 48 "src/collectives/device/reduce.cu.cc", 49 "src/collectives/device/reduce_scatter.cu.cc", 50 "src/collectives/device/sendrecv.cu.cc", 51 ], 52) 53 54cuda_rdc_library( 55 name = "device", 56 srcs = [ 57 "src/collectives/device/functions.cu.cc", 58 "src/collectives/device/onerank_reduce.cu.cc", 59 ":device_srcs", 60 ] + glob([ 61 # Required for header inclusion checking, see below for details. 62 "src/collectives/device/*.h", 63 "src/nccl.h", 64 ]), 65 deps = [ 66 ":device_hdrs", 67 ":include_hdrs", 68 ":src_hdrs", 69 "@local_config_cuda//cuda:cuda_headers", 70 ], 71) 72 73cc_library( 74 name = "net", 75 srcs = [ 76 "src/transport/coll_net.cc", 77 "src/transport/net.cc", 78 ], 79 linkopts = select({ 80 "@org_tensorflow//tensorflow:macos": [], 81 "//conditions:default": ["-lrt"], 82 }), 83 deps = [ 84 ":include_hdrs", 85 ":src_hdrs", 86 ], 87) 88 89cc_library( 90 name = "nccl", 91 srcs = glob( 92 include = [ 93 "src/**/*.cc", 94 # Required for header inclusion checking, see below for details. 95 "src/graph/*.h", 96 ], 97 # Exclude device-library code. 98 exclude = [ 99 "src/collectives/device/**", 100 "src/transport/coll_net.cc", 101 "src/transport/net.cc", 102 ], 103 ) + [ 104 # Required for header inclusion checking (see 105 # http://docs.bazel.build/versions/master/be/c-cpp.html#hdrs). 106 # Files in src/ which #include "nccl.h" load it from there rather than 107 # from the virtual includes directory. 108 "src/include/collectives.h", 109 "src/nccl.h", 110 ], 111 hdrs = ["src/nccl.h"], 112 include_prefix = "third_party/nccl", 113 linkopts = select({ 114 "@org_tensorflow//tensorflow:macos": [], 115 "//conditions:default": ["-lrt"], 116 }), 117 strip_include_prefix = "src", 118 visibility = ["//visibility:public"], 119 deps = [ 120 ":device", 121 ":include_hdrs", 122 ":net", 123 ":src_hdrs", 124 ], 125) 126