1"""Repository rule for NCCL configuration. 2 3`nccl_configure` depends on the following environment variables: 4 5 * `TF_NCCL_VERSION`: Installed NCCL version or empty to build from source. 6 * `NCCL_INSTALL_PATH` (deprecated): The installation path of the NCCL library. 7 * `NCCL_HDR_PATH` (deprecated): The installation path of the NCCL header 8 files. 9 * `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is 10 `/usr/local/cuda,usr/`. 11 12""" 13 14load( 15 "//third_party/gpus:cuda_configure.bzl", 16 "enable_cuda", 17 "find_cuda_config", 18) 19load( 20 "//third_party/remote_config:common.bzl", 21 "config_repo_label", 22 "get_cpu_value", 23 "get_host_environ", 24) 25 26_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH" 27_NCCL_HDR_PATH = "NCCL_HDR_PATH" 28_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH" 29_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES" 30_TF_NCCL_VERSION = "TF_NCCL_VERSION" 31_TF_NEED_CUDA = "TF_NEED_CUDA" 32 33_DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR" 34_DEFINE_NCCL_MINOR = "#define NCCL_MINOR" 35_DEFINE_NCCL_PATCH = "#define NCCL_PATCH" 36 37_NCCL_DUMMY_BUILD_CONTENT = """ 38filegroup( 39 name = "LICENSE", 40 visibility = ["//visibility:public"], 41) 42 43cc_library( 44 name = "nccl", 45 visibility = ["//visibility:public"], 46) 47""" 48 49_NCCL_ARCHIVE_BUILD_CONTENT = """ 50filegroup( 51 name = "LICENSE", 52 data = ["@nccl_archive//:LICENSE.txt"], 53 visibility = ["//visibility:public"], 54) 55 56alias( 57 name = "nccl", 58 actual = "@nccl_archive//:nccl", 59 visibility = ["//visibility:public"], 60) 61""" 62 63def _label(file): 64 return Label("//third_party/nccl:{}".format(file)) 65 66def _create_local_nccl_repository(repository_ctx): 67 # Resolve all labels before doing any real work. Resolving causes the 68 # function to be restarted with all previous state being lost. This 69 # can easily lead to a O(n^2) runtime in the number of labels. 70 # See https://github.com/tensorflow/tensorflow/commit/62bd3534525a036f07d9851b3199d68212904778 71 find_cuda_config_path = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_cuda_config.py.gz.base64")) 72 73 nccl_version = get_host_environ(repository_ctx, _TF_NCCL_VERSION, "") 74 if nccl_version: 75 nccl_version = nccl_version.split(".")[0] 76 77 cuda_config = find_cuda_config(repository_ctx, find_cuda_config_path, ["cuda"]) 78 cuda_version = cuda_config["cuda_version"].split(".") 79 80 if nccl_version == "": 81 # Alias to open source build from @nccl_archive. 82 repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) 83 84 repository_ctx.template( 85 "build_defs.bzl", 86 _label("build_defs.bzl.tpl"), 87 {"%{cuda_version}": "(%s, %s)" % tuple(cuda_version)}, 88 ) 89 else: 90 # Create target for locally installed NCCL. 91 config = find_cuda_config(repository_ctx, find_cuda_config_path, ["nccl"]) 92 config_wrap = { 93 "%{nccl_version}": config["nccl_version"], 94 "%{nccl_header_dir}": config["nccl_include_dir"], 95 "%{nccl_library_dir}": config["nccl_library_dir"], 96 } 97 repository_ctx.template("BUILD", _label("system.BUILD.tpl"), config_wrap) 98 99def _create_remote_nccl_repository(repository_ctx, remote_config_repo): 100 repository_ctx.template( 101 "BUILD", 102 config_repo_label(remote_config_repo, ":BUILD"), 103 {}, 104 ) 105 106 nccl_version = get_host_environ(repository_ctx, _TF_NCCL_VERSION, "") 107 if nccl_version == "": 108 repository_ctx.template( 109 "build_defs.bzl", 110 config_repo_label(remote_config_repo, ":build_defs.bzl"), 111 {}, 112 ) 113 114def _nccl_autoconf_impl(repository_ctx): 115 if (not enable_cuda(repository_ctx) or 116 get_cpu_value(repository_ctx) not in ("Linux", "FreeBSD")): 117 # Add a dummy build file to make bazel query happy. 118 repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT) 119 elif get_host_environ(repository_ctx, "TF_NCCL_CONFIG_REPO") != None: 120 _create_remote_nccl_repository(repository_ctx, get_host_environ(repository_ctx, "TF_NCCL_CONFIG_REPO")) 121 else: 122 _create_local_nccl_repository(repository_ctx) 123 124_ENVIRONS = [ 125 _CUDA_TOOLKIT_PATH, 126 _NCCL_HDR_PATH, 127 _NCCL_INSTALL_PATH, 128 _TF_NCCL_VERSION, 129 _TF_CUDA_COMPUTE_CAPABILITIES, 130 _TF_NEED_CUDA, 131 "TF_CUDA_PATHS", 132] 133 134remote_nccl_configure = repository_rule( 135 implementation = _create_local_nccl_repository, 136 environ = _ENVIRONS, 137 remotable = True, 138 attrs = { 139 "environ": attr.string_dict(), 140 }, 141) 142 143nccl_configure = repository_rule( 144 implementation = _nccl_autoconf_impl, 145 environ = _ENVIRONS, 146) 147"""Detects and configures the NCCL configuration. 148 149Add the following to your WORKSPACE FILE: 150 151```python 152nccl_configure(name = "local_config_nccl") 153``` 154 155Args: 156 name: A unique name for this workspace rule. 157""" 158