c10/cuda/CUDAAllocatorConfig.cpp

*da0073e9SAndroid Build Coastguard Worker#include <c10/cuda/CUDAAllocatorConfig.h>
*da0073e9SAndroid Build Coastguard Worker#include <c10/cuda/CUDACachingAllocator.h>
*da0073e9SAndroid Build Coastguard Worker#include <c10/util/llvmMathExtras.h>
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker#if !defined(USE_ROCM) && defined(PYTORCH_C10_DRIVER_API_SUPPORTED)
*da0073e9SAndroid Build Coastguard Worker#include <c10/cuda/driver_api.h>
*da0073e9SAndroid Build Coastguard Worker#endif
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workernamespace c10::cuda::CUDACachingAllocator {
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerconstexpr size_t kRoundUpPowerOfTwoIntervals = 16;
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard WorkerCUDAAllocatorConfig::CUDAAllocatorConfig()
*da0073e9SAndroid Build Coastguard Worker    : m_max_split_size(std::numeric_limits<size_t>::max()),
*da0073e9SAndroid Build Coastguard Worker      m_garbage_collection_threshold(0),
*da0073e9SAndroid Build Coastguard Worker      m_pinned_num_register_threads(1),
*da0073e9SAndroid Build Coastguard Worker      m_expandable_segments(false),
*da0073e9SAndroid Build Coastguard Worker      m_release_lock_on_cudamalloc(false),
*da0073e9SAndroid Build Coastguard Worker      m_pinned_use_cuda_host_register(false),
*da0073e9SAndroid Build Coastguard Worker      m_last_allocator_settings("") {
*da0073e9SAndroid Build Coastguard Worker  m_roundup_power2_divisions.assign(kRoundUpPowerOfTwoIntervals, 0);
*da0073e9SAndroid Build Coastguard Worker}
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workersize_t CUDAAllocatorConfig::roundup_power2_divisions(size_t size) {
*da0073e9SAndroid Build Coastguard Worker  size_t log_size = (63 - llvm::countLeadingZeros(size));
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker  // Our intervals start at 1MB and end at 64GB
*da0073e9SAndroid Build Coastguard Worker  const size_t interval_start =
*da0073e9SAndroid Build Coastguard Worker      63 - llvm::countLeadingZeros(static_cast<size_t>(1048576));
*da0073e9SAndroid Build Coastguard Worker  const size_t interval_end =
*da0073e9SAndroid Build Coastguard Worker      63 - llvm::countLeadingZeros(static_cast<size_t>(68719476736));
*da0073e9SAndroid Build Coastguard Worker  TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker      (interval_end - interval_start == kRoundUpPowerOfTwoIntervals),
*da0073e9SAndroid Build Coastguard Worker      "kRoundUpPowerOfTwoIntervals mismatch");
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker  int index = static_cast<int>(log_size) - static_cast<int>(interval_start);
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker  index = std::max(0, index);
*da0073e9SAndroid Build Coastguard Worker  index = std::min(index, static_cast<int>(kRoundUpPowerOfTwoIntervals) - 1);
*da0073e9SAndroid Build Coastguard Worker  return instance().m_roundup_power2_divisions[index];
*da0073e9SAndroid Build Coastguard Worker}
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workervoid CUDAAllocatorConfig::lexArgs(
*da0073e9SAndroid Build Coastguard Worker    const char* env,
*da0073e9SAndroid Build Coastguard Worker    std::vector<std::string>& config) {
*da0073e9SAndroid Build Coastguard Worker  std::vector<char> buf;
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker  size_t env_length = strlen(env);
*da0073e9SAndroid Build Coastguard Worker  for (size_t i = 0; i < env_length; i++) {
*da0073e9SAndroid Build Coastguard Worker    if (env[i] == ',' || env[i] == ':' || env[i] == '[' || env[i] == ']') {
*da0073e9SAndroid Build Coastguard Worker      if (!buf.empty()) {
*da0073e9SAndroid Build Coastguard Worker        config.emplace_back(buf.begin(), buf.end());
*da0073e9SAndroid Build Coastguard Worker        buf.clear();
*da0073e9SAndroid Build Coastguard Worker      }
*da0073e9SAndroid Build Coastguard Worker      config.emplace_back(1, env[i]);
*da0073e9SAndroid Build Coastguard Worker    } else if (env[i] != ' ') {
*da0073e9SAndroid Build Coastguard Worker      buf.emplace_back(static_cast<char>(env[i]));
*da0073e9SAndroid Build Coastguard Worker    }
*da0073e9SAndroid Build Coastguard Worker  }
*da0073e9SAndroid Build Coastguard Worker  if (!buf.empty()) {
*da0073e9SAndroid Build Coastguard Worker    config.emplace_back(buf.begin(), buf.end());
*da0073e9SAndroid Build Coastguard Worker  }
*da0073e9SAndroid Build Coastguard Worker}
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workervoid CUDAAllocatorConfig::consumeToken(
*da0073e9SAndroid Build Coastguard Worker    const std::vector<std::string>& config,
*da0073e9SAndroid Build Coastguard Worker    size_t i,
*da0073e9SAndroid Build Coastguard Worker    const char c) {
*da0073e9SAndroid Build Coastguard Worker  TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker      i < config.size() && config[i] == std::string(1, c),
*da0073e9SAndroid Build Coastguard Worker      "Error parsing CachingAllocator settings, expected ",
*da0073e9SAndroid Build Coastguard Worker      c,
*da0073e9SAndroid Build Coastguard Worker      "");
*da0073e9SAndroid Build Coastguard Worker}
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workersize_t CUDAAllocatorConfig::parseMaxSplitSize(
*da0073e9SAndroid Build Coastguard Worker    const std::vector<std::string>& config,
*da0073e9SAndroid Build Coastguard Worker    size_t i) {
*da0073e9SAndroid Build Coastguard Worker  consumeToken(config, ++i, ':');
*da0073e9SAndroid Build Coastguard Worker  constexpr int mb = 1024 * 1024;
*da0073e9SAndroid Build Coastguard Worker  if (++i < config.size()) {
*da0073e9SAndroid Build Coastguard Worker    size_t val1 = stoi(config[i]);
*da0073e9SAndroid Build Coastguard Worker    TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker        val1 > kLargeBuffer / mb,
*da0073e9SAndroid Build Coastguard Worker        "CachingAllocator option max_split_size_mb too small, must be > ",
*da0073e9SAndroid Build Coastguard Worker        kLargeBuffer / mb,
*da0073e9SAndroid Build Coastguard Worker        "");
*da0073e9SAndroid Build Coastguard Worker    val1 = std::max(val1, kLargeBuffer / mb);
*da0073e9SAndroid Build Coastguard Worker    val1 = std::min(val1, (std::numeric_limits<size_t>::max() / mb));
*da0073e9SAndroid Build Coastguard Worker    m_max_split_size = val1 * 1024 * 1024;
*da0073e9SAndroid Build Coastguard Worker  } else {
*da0073e9SAndroid Build Coastguard Worker    TORCH_CHECK(false, "Error, expecting max_split_size_mb value", "");
*da0073e9SAndroid Build Coastguard Worker  }
*da0073e9SAndroid Build Coastguard Worker  return i;
*da0073e9SAndroid Build Coastguard Worker}
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workersize_t CUDAAllocatorConfig::parseGarbageCollectionThreshold(
*da0073e9SAndroid Build Coastguard Worker    const std::vector<std::string>& config,
*da0073e9SAndroid Build Coastguard Worker    size_t i) {
*da0073e9SAndroid Build Coastguard Worker  consumeToken(config, ++i, ':');
*da0073e9SAndroid Build Coastguard Worker  if (++i < config.size()) {
*da0073e9SAndroid Build Coastguard Worker    double val1 = stod(config[i]);
*da0073e9SAndroid Build Coastguard Worker    TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker        val1 > 0, "garbage_collect_threshold too small, set it 0.0~1.0", "");
*da0073e9SAndroid Build Coastguard Worker    TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker        val1 < 1.0, "garbage_collect_threshold too big, set it 0.0~1.0", "");
*da0073e9SAndroid Build Coastguard Worker    m_garbage_collection_threshold = val1;
*da0073e9SAndroid Build Coastguard Worker  } else {
*da0073e9SAndroid Build Coastguard Worker    TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker        false, "Error, expecting garbage_collection_threshold value", "");
*da0073e9SAndroid Build Coastguard Worker  }
*da0073e9SAndroid Build Coastguard Worker  return i;
*da0073e9SAndroid Build Coastguard Worker}
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workersize_t CUDAAllocatorConfig::parseRoundUpPower2Divisions(
*da0073e9SAndroid Build Coastguard Worker    const std::vector<std::string>& config,
*da0073e9SAndroid Build Coastguard Worker    size_t i) {
*da0073e9SAndroid Build Coastguard Worker  consumeToken(config, ++i, ':');
*da0073e9SAndroid Build Coastguard Worker  bool first_value = true;
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker  if (++i < config.size()) {
*da0073e9SAndroid Build Coastguard Worker    if (std::string_view(config[i]) == "[") {
*da0073e9SAndroid Build Coastguard Worker      size_t last_index = 0;
*da0073e9SAndroid Build Coastguard Worker      while (++i < config.size() && std::string_view(config[i]) != "]") {
*da0073e9SAndroid Build Coastguard Worker        const std::string& val1 = config[i];
*da0073e9SAndroid Build Coastguard Worker        size_t val2 = 0;
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        consumeToken(config, ++i, ':');
*da0073e9SAndroid Build Coastguard Worker        if (++i < config.size()) {
*da0073e9SAndroid Build Coastguard Worker          val2 = stoi(config[i]);
*da0073e9SAndroid Build Coastguard Worker        } else {
*da0073e9SAndroid Build Coastguard Worker          TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker              false, "Error parsing roundup_power2_divisions value", "");
*da0073e9SAndroid Build Coastguard Worker        }
*da0073e9SAndroid Build Coastguard Worker        TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker            val2 == 0 || llvm::isPowerOf2_64(val2),
*da0073e9SAndroid Build Coastguard Worker            "For roundups, the divisons has to be power of 2 or 0 to disable roundup ",
*da0073e9SAndroid Build Coastguard Worker            "");
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        if (std::string_view(val1) == ">") {
*da0073e9SAndroid Build Coastguard Worker          std::fill(
*da0073e9SAndroid Build Coastguard Worker              std::next(
*da0073e9SAndroid Build Coastguard Worker                  m_roundup_power2_divisions.begin(),
*da0073e9SAndroid Build Coastguard Worker                  static_cast<std::vector<unsigned long>::difference_type>(
*da0073e9SAndroid Build Coastguard Worker                      last_index)),
*da0073e9SAndroid Build Coastguard Worker              m_roundup_power2_divisions.end(),
*da0073e9SAndroid Build Coastguard Worker              val2);
*da0073e9SAndroid Build Coastguard Worker        } else {
*da0073e9SAndroid Build Coastguard Worker          size_t val1_long = stoul(val1);
*da0073e9SAndroid Build Coastguard Worker          TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker              llvm::isPowerOf2_64(val1_long),
*da0073e9SAndroid Build Coastguard Worker              "For roundups, the intervals have to be power of 2 ",
*da0073e9SAndroid Build Coastguard Worker              "");
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker          size_t index = 63 - llvm::countLeadingZeros(val1_long);
*da0073e9SAndroid Build Coastguard Worker          index = std::max((size_t)0, index);
*da0073e9SAndroid Build Coastguard Worker          index = std::min(index, m_roundup_power2_divisions.size() - 1);
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker          if (first_value) {
*da0073e9SAndroid Build Coastguard Worker            std::fill(
*da0073e9SAndroid Build Coastguard Worker                m_roundup_power2_divisions.begin(),
*da0073e9SAndroid Build Coastguard Worker                std::next(
*da0073e9SAndroid Build Coastguard Worker                    m_roundup_power2_divisions.begin(),
*da0073e9SAndroid Build Coastguard Worker                    static_cast<std::vector<unsigned long>::difference_type>(
*da0073e9SAndroid Build Coastguard Worker                        index)),
*da0073e9SAndroid Build Coastguard Worker                val2);
*da0073e9SAndroid Build Coastguard Worker            first_value = false;
*da0073e9SAndroid Build Coastguard Worker          }
*da0073e9SAndroid Build Coastguard Worker          if (index < m_roundup_power2_divisions.size()) {
*da0073e9SAndroid Build Coastguard Worker            m_roundup_power2_divisions[index] = val2;
*da0073e9SAndroid Build Coastguard Worker          }
*da0073e9SAndroid Build Coastguard Worker          last_index = index;
*da0073e9SAndroid Build Coastguard Worker        }
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        if (std::string_view(config[i + 1]) != "]") {
*da0073e9SAndroid Build Coastguard Worker          consumeToken(config, ++i, ',');
*da0073e9SAndroid Build Coastguard Worker        }
*da0073e9SAndroid Build Coastguard Worker      }
*da0073e9SAndroid Build Coastguard Worker    } else { // Keep this for backwards compatibility
*da0073e9SAndroid Build Coastguard Worker      size_t val1 = stoi(config[i]);
*da0073e9SAndroid Build Coastguard Worker      TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker          llvm::isPowerOf2_64(val1),
*da0073e9SAndroid Build Coastguard Worker          "For roundups, the divisons has to be power of 2 ",
*da0073e9SAndroid Build Coastguard Worker          "");
*da0073e9SAndroid Build Coastguard Worker      std::fill(
*da0073e9SAndroid Build Coastguard Worker          m_roundup_power2_divisions.begin(),
*da0073e9SAndroid Build Coastguard Worker          m_roundup_power2_divisions.end(),
*da0073e9SAndroid Build Coastguard Worker          val1);
*da0073e9SAndroid Build Coastguard Worker    }
*da0073e9SAndroid Build Coastguard Worker  } else {
*da0073e9SAndroid Build Coastguard Worker    TORCH_CHECK(false, "Error, expecting roundup_power2_divisions value", "");
*da0073e9SAndroid Build Coastguard Worker  }
*da0073e9SAndroid Build Coastguard Worker  return i;
*da0073e9SAndroid Build Coastguard Worker}
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workersize_t CUDAAllocatorConfig::parseAllocatorConfig(
*da0073e9SAndroid Build Coastguard Worker    const std::vector<std::string>& config,
*da0073e9SAndroid Build Coastguard Worker    size_t i,
*da0073e9SAndroid Build Coastguard Worker    bool& used_cudaMallocAsync) {
*da0073e9SAndroid Build Coastguard Worker  consumeToken(config, ++i, ':');
*da0073e9SAndroid Build Coastguard Worker  if (++i < config.size()) {
*da0073e9SAndroid Build Coastguard Worker    TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker        ((config[i] == "native") || (config[i] == "cudaMallocAsync")),
*da0073e9SAndroid Build Coastguard Worker        "Unknown allocator backend, "
*da0073e9SAndroid Build Coastguard Worker        "options are native and cudaMallocAsync");
*da0073e9SAndroid Build Coastguard Worker    used_cudaMallocAsync = (config[i] == "cudaMallocAsync");
*da0073e9SAndroid Build Coastguard Worker#ifndef USE_ROCM
*da0073e9SAndroid Build Coastguard Worker    // HIP supports hipMallocAsync and does not need to check versions
*da0073e9SAndroid Build Coastguard Worker    if (used_cudaMallocAsync) {
*da0073e9SAndroid Build Coastguard Worker#if CUDA_VERSION >= 11040
*da0073e9SAndroid Build Coastguard Worker      int version = 0;
*da0073e9SAndroid Build Coastguard Worker      C10_CUDA_CHECK(cudaDriverGetVersion(&version));
*da0073e9SAndroid Build Coastguard Worker      TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker          version >= 11040,
*da0073e9SAndroid Build Coastguard Worker          "backend:cudaMallocAsync requires CUDA runtime "
*da0073e9SAndroid Build Coastguard Worker          "11.4 or newer, but cudaDriverGetVersion returned ",
*da0073e9SAndroid Build Coastguard Worker          version);
*da0073e9SAndroid Build Coastguard Worker#else
*da0073e9SAndroid Build Coastguard Worker      TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker          false,
*da0073e9SAndroid Build Coastguard Worker          "backend:cudaMallocAsync requires PyTorch to be built with "
*da0073e9SAndroid Build Coastguard Worker          "CUDA 11.4 or newer, but CUDA_VERSION is ",
*da0073e9SAndroid Build Coastguard Worker          CUDA_VERSION);
*da0073e9SAndroid Build Coastguard Worker#endif
*da0073e9SAndroid Build Coastguard Worker    }
*da0073e9SAndroid Build Coastguard Worker#endif
*da0073e9SAndroid Build Coastguard Worker    TORCH_INTERNAL_ASSERT(
*da0073e9SAndroid Build Coastguard Worker        config[i] == get()->name(),
*da0073e9SAndroid Build Coastguard Worker        "Allocator backend parsed at runtime != "
*da0073e9SAndroid Build Coastguard Worker        "allocator backend parsed at load time");
*da0073e9SAndroid Build Coastguard Worker  } else {
*da0073e9SAndroid Build Coastguard Worker    TORCH_CHECK(false, "Error parsing backend value", "");
*da0073e9SAndroid Build Coastguard Worker  }
*da0073e9SAndroid Build Coastguard Worker  return i;
*da0073e9SAndroid Build Coastguard Worker}
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workervoid CUDAAllocatorConfig::parseArgs(const char* env) {
*da0073e9SAndroid Build Coastguard Worker  // If empty, set the default values
*da0073e9SAndroid Build Coastguard Worker  m_max_split_size = std::numeric_limits<size_t>::max();
*da0073e9SAndroid Build Coastguard Worker  m_roundup_power2_divisions.assign(kRoundUpPowerOfTwoIntervals, 0);
*da0073e9SAndroid Build Coastguard Worker  m_garbage_collection_threshold = 0;
*da0073e9SAndroid Build Coastguard Worker  bool used_cudaMallocAsync = false;
*da0073e9SAndroid Build Coastguard Worker  bool used_native_specific_option = false;
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker  if (env == nullptr) {
*da0073e9SAndroid Build Coastguard Worker    return;
*da0073e9SAndroid Build Coastguard Worker  }
*da0073e9SAndroid Build Coastguard Worker  {
*da0073e9SAndroid Build Coastguard Worker    std::lock_guard<std::mutex> lock(m_last_allocator_settings_mutex);
*da0073e9SAndroid Build Coastguard Worker    m_last_allocator_settings = env;
*da0073e9SAndroid Build Coastguard Worker  }
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker  std::vector<std::string> config;
*da0073e9SAndroid Build Coastguard Worker  lexArgs(env, config);
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker  for (size_t i = 0; i < config.size(); i++) {
*da0073e9SAndroid Build Coastguard Worker    std::string_view config_item_view(config[i]);
*da0073e9SAndroid Build Coastguard Worker    if (config_item_view == "max_split_size_mb") {
*da0073e9SAndroid Build Coastguard Worker      i = parseMaxSplitSize(config, i);
*da0073e9SAndroid Build Coastguard Worker      used_native_specific_option = true;
*da0073e9SAndroid Build Coastguard Worker    } else if (config_item_view == "garbage_collection_threshold") {
*da0073e9SAndroid Build Coastguard Worker      i = parseGarbageCollectionThreshold(config, i);
*da0073e9SAndroid Build Coastguard Worker      used_native_specific_option = true;
*da0073e9SAndroid Build Coastguard Worker    } else if (config_item_view == "roundup_power2_divisions") {
*da0073e9SAndroid Build Coastguard Worker      i = parseRoundUpPower2Divisions(config, i);
*da0073e9SAndroid Build Coastguard Worker      used_native_specific_option = true;
*da0073e9SAndroid Build Coastguard Worker    } else if (config_item_view == "backend") {
*da0073e9SAndroid Build Coastguard Worker      i = parseAllocatorConfig(config, i, used_cudaMallocAsync);
*da0073e9SAndroid Build Coastguard Worker    } else if (config_item_view == "expandable_segments") {
*da0073e9SAndroid Build Coastguard Worker      used_native_specific_option = true;
*da0073e9SAndroid Build Coastguard Worker      consumeToken(config, ++i, ':');
*da0073e9SAndroid Build Coastguard Worker      ++i;
*da0073e9SAndroid Build Coastguard Worker      TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker          i < config.size() &&
*da0073e9SAndroid Build Coastguard Worker              (std::string_view(config[i]) == "True" ||
*da0073e9SAndroid Build Coastguard Worker               std::string_view(config[i]) == "False"),
*da0073e9SAndroid Build Coastguard Worker          "Expected a single True/False argument for expandable_segments");
*da0073e9SAndroid Build Coastguard Worker      config_item_view = config[i];
*da0073e9SAndroid Build Coastguard Worker      m_expandable_segments = (config_item_view == "True");
*da0073e9SAndroid Build Coastguard Worker    } else if (
*da0073e9SAndroid Build Coastguard Worker        // ROCm build's hipify step will change "cuda" to "hip", but for ease of
*da0073e9SAndroid Build Coastguard Worker        // use, accept both. We must break up the string to prevent hipify here.
*da0073e9SAndroid Build Coastguard Worker        config_item_view == "release_lock_on_hipmalloc" ||
*da0073e9SAndroid Build Coastguard Worker        config_item_view ==
*da0073e9SAndroid Build Coastguard Worker            "release_lock_on_c"
*da0073e9SAndroid Build Coastguard Worker            "udamalloc") {
*da0073e9SAndroid Build Coastguard Worker      used_native_specific_option = true;
*da0073e9SAndroid Build Coastguard Worker      consumeToken(config, ++i, ':');
*da0073e9SAndroid Build Coastguard Worker      ++i;
*da0073e9SAndroid Build Coastguard Worker      TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker          i < config.size() &&
*da0073e9SAndroid Build Coastguard Worker              (std::string_view(config[i]) == "True" ||
*da0073e9SAndroid Build Coastguard Worker               std::string_view(config[i]) == "False"),
*da0073e9SAndroid Build Coastguard Worker          "Expected a single True/False argument for release_lock_on_cudamalloc");
*da0073e9SAndroid Build Coastguard Worker      config_item_view = config[i];
*da0073e9SAndroid Build Coastguard Worker      m_release_lock_on_cudamalloc = (config_item_view == "True");
*da0073e9SAndroid Build Coastguard Worker    } else if (
*da0073e9SAndroid Build Coastguard Worker        // ROCm build's hipify step will change "cuda" to "hip", but for ease of
*da0073e9SAndroid Build Coastguard Worker        // use, accept both. We must break up the string to prevent hipify here.
*da0073e9SAndroid Build Coastguard Worker        config_item_view == "pinned_use_hip_host_register" ||
*da0073e9SAndroid Build Coastguard Worker        config_item_view ==
*da0073e9SAndroid Build Coastguard Worker            "pinned_use_c"
*da0073e9SAndroid Build Coastguard Worker            "uda_host_register") {
*da0073e9SAndroid Build Coastguard Worker      i = parsePinnedUseCudaHostRegister(config, i);
*da0073e9SAndroid Build Coastguard Worker      used_native_specific_option = true;
*da0073e9SAndroid Build Coastguard Worker    } else if (config_item_view == "pinned_num_register_threads") {
*da0073e9SAndroid Build Coastguard Worker      i = parsePinnedNumRegisterThreads(config, i);
*da0073e9SAndroid Build Coastguard Worker      used_native_specific_option = true;
*da0073e9SAndroid Build Coastguard Worker    } else {
*da0073e9SAndroid Build Coastguard Worker      TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker          false, "Unrecognized CachingAllocator option: ", config_item_view);
*da0073e9SAndroid Build Coastguard Worker    }
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    if (i + 1 < config.size()) {
*da0073e9SAndroid Build Coastguard Worker      consumeToken(config, ++i, ',');
*da0073e9SAndroid Build Coastguard Worker    }
*da0073e9SAndroid Build Coastguard Worker  }
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker  if (used_cudaMallocAsync && used_native_specific_option) {
*da0073e9SAndroid Build Coastguard Worker    TORCH_WARN(
*da0073e9SAndroid Build Coastguard Worker        "backend:cudaMallocAsync ignores max_split_size_mb,"
*da0073e9SAndroid Build Coastguard Worker        "roundup_power2_divisions, and garbage_collect_threshold.");
*da0073e9SAndroid Build Coastguard Worker  }
*da0073e9SAndroid Build Coastguard Worker}
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workersize_t CUDAAllocatorConfig::parsePinnedUseCudaHostRegister(
*da0073e9SAndroid Build Coastguard Worker    const std::vector<std::string>& config,
*da0073e9SAndroid Build Coastguard Worker    size_t i) {
*da0073e9SAndroid Build Coastguard Worker  consumeToken(config, ++i, ':');
*da0073e9SAndroid Build Coastguard Worker  if (++i < config.size()) {
*da0073e9SAndroid Build Coastguard Worker    TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker        (config[i] == "True" || config[i] == "False"),
*da0073e9SAndroid Build Coastguard Worker        "Expected a single True/False argument for pinned_use_cuda_host_register");
*da0073e9SAndroid Build Coastguard Worker    m_pinned_use_cuda_host_register = (config[i] == "True");
*da0073e9SAndroid Build Coastguard Worker  } else {
*da0073e9SAndroid Build Coastguard Worker    TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker        false, "Error, expecting pinned_use_cuda_host_register value", "");
*da0073e9SAndroid Build Coastguard Worker  }
*da0073e9SAndroid Build Coastguard Worker  return i;
*da0073e9SAndroid Build Coastguard Worker}
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workersize_t CUDAAllocatorConfig::parsePinnedNumRegisterThreads(
*da0073e9SAndroid Build Coastguard Worker    const std::vector<std::string>& config,
*da0073e9SAndroid Build Coastguard Worker    size_t i) {
*da0073e9SAndroid Build Coastguard Worker  consumeToken(config, ++i, ':');
*da0073e9SAndroid Build Coastguard Worker  if (++i < config.size()) {
*da0073e9SAndroid Build Coastguard Worker    size_t val2 = stoi(config[i]);
*da0073e9SAndroid Build Coastguard Worker    TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker        llvm::isPowerOf2_64(val2),
*da0073e9SAndroid Build Coastguard Worker        "Number of register threads has to be power of 2 ",
*da0073e9SAndroid Build Coastguard Worker        "");
*da0073e9SAndroid Build Coastguard Worker    auto maxThreads = CUDAAllocatorConfig::pinned_max_register_threads();
*da0073e9SAndroid Build Coastguard Worker    TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker        val2 <= maxThreads,
*da0073e9SAndroid Build Coastguard Worker        "Number of register threads should be less than or equal to " +
*da0073e9SAndroid Build Coastguard Worker            std::to_string(maxThreads),
*da0073e9SAndroid Build Coastguard Worker        "");
*da0073e9SAndroid Build Coastguard Worker    m_pinned_num_register_threads = val2;
*da0073e9SAndroid Build Coastguard Worker  } else {
*da0073e9SAndroid Build Coastguard Worker    TORCH_CHECK(
*da0073e9SAndroid Build Coastguard Worker        false, "Error, expecting pinned_num_register_threads value", "");
*da0073e9SAndroid Build Coastguard Worker  }
*da0073e9SAndroid Build Coastguard Worker  return i;
*da0073e9SAndroid Build Coastguard Worker}
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker// General caching allocator utilities
*da0073e9SAndroid Build Coastguard Workervoid setAllocatorSettings(const std::string& env) {
*da0073e9SAndroid Build Coastguard Worker  CUDACachingAllocator::CUDAAllocatorConfig::instance().parseArgs(env.c_str());
*da0073e9SAndroid Build Coastguard Worker}
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker} // namespace c10::cuda::CUDACachingAllocator