Home
last modified time | relevance | path

Searched refs:compute_capability (Results 1 – 19 of 19) sorted by relevance

/aosp_15_r20/external/tensorflow/tensorflow/core/profiler/utils/
H A Dhardware_type_utils.cc35 switch (device_cap.compute_capability().major()) { in GetFmaMaxThroughputPerSMPerCycle()
50 if (device_cap.compute_capability().minor() > 0) { in GetFmaMaxThroughputPerSMPerCycle()
65 if (device_cap.compute_capability().minor() >= 6) { in GetFmaMaxThroughputPerSMPerCycle()
85 if (device_cap.compute_capability().major() <= 9) { in GetFmaMaxThroughputPerSMPerCycle()
108 switch (device_cap.compute_capability().major()) { in GpuModelName()
118 if (device_cap.compute_capability().minor() < 5) { in GpuModelName()
129 switch (device_cap.compute_capability().major()) { in GpuModelName()
H A Ddevice_caps_utils.cc45 caps.compute_capability().major()); in SetDeviceCaps()
48 caps.compute_capability().minor()); in SetDeviceCaps()
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/
H A Dgpu_backend_lib.cc89 static std::string GetSmName(se::CudaComputeCapability compute_capability) { in GetSmName() argument
91 compute_capability.major * 10 + compute_capability.minor; in GetSmName()
111 << compute_capability.ToString() in GetSmName()
360 llvm::Triple target_triple, se::CudaComputeCapability compute_capability, in NVPTXGetTargetMachine() argument
364 return GetTargetMachine(target_triple, GetSmName(compute_capability), in NVPTXGetTargetMachine()
535 auto compute_capability = in CompileToPtx() local
537 if (!compute_capability) { in CompileToPtx()
545 default_target_triple, *compute_capability, hlo_module_config); in CompileToPtx()
793 auto compute_capability = in AMDGPUTargetModuleLinker() local
795 if (!compute_capability) { in AMDGPUTargetModuleLinker()
[all …]
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/service/gpu/
H A Dcudnn_support_utils.cc28 const se::CudaComputeCapability& compute_capability, in CudnnSupportsOptimizedIntegerConvolution() argument
45 if ((vector_size == 32 && !compute_capability.IsAtLeast(7, 5)) || in CudnnSupportsOptimizedIntegerConvolution()
46 !compute_capability.IsAtLeast(6, 1)) { in CudnnSupportsOptimizedIntegerConvolution()
47 VLOG(3) << "Compute capability " << compute_capability.ToString() in CudnnSupportsOptimizedIntegerConvolution()
H A Dcudnn_vectorize_convolutions.h49 se::CudaComputeCapability compute_capability) in CudnnVectorizeConvolutions() argument
50 : compute_capability_(compute_capability) {} in CudnnVectorizeConvolutions()
H A Dcudnn_pad_for_convolutions.h34 explicit CudnnPadForConvolutions(se::CudaComputeCapability compute_capability) in CudnnPadForConvolutions() argument
35 : compute_capability_(compute_capability) {} in CudnnPadForConvolutions()
H A Dcudnn_vectorize_convolutions.cc261 const se::CudaComputeCapability& compute_capability, in TryRevectorizeConv() argument
297 compute_capability, *conv, vect_size)); in TryRevectorizeConv()
399 const se::CudaComputeCapability& compute_capability, in TryVectorizeConv() argument
424 compute_capability, *conv, vect_size)); in TryVectorizeConv()
H A Dcudnn_simplify_padding_test.cc46 StatusOr<bool> RunEndToEnd(std::pair<int, int> compute_capability, in RunEndToEnd() argument
48 se::CudaComputeCapability cc{compute_capability.first, in RunEndToEnd()
49 compute_capability.second}; in RunEndToEnd()
H A Dcudnn_vectorize_convolutions_test.cc37 StatusOr<bool> Run(std::pair<int, int> compute_capability, in Run() argument
40 compute_capability.first, compute_capability.second}); in Run()
H A Dcudnn_support_utils.h35 const se::CudaComputeCapability& compute_capability,
H A Dcudnn_pad_for_convolutions.cc295 int pad_to, const se::CudaComputeCapability& compute_capability, in TryResolvePaddedShapesForIntegerConvolution() argument
351 compute_capability, *conv, pad_to)); in TryResolvePaddedShapesForIntegerConvolution()
/aosp_15_r20/external/pytorch/c10/cuda/
H A DCUDADeviceAssertionHost.cpp50 int compute_capability = -1; in dsa_get_device_compute_capability() local
52 &compute_capability, cudaDevAttrComputeCapabilityMajor, device_num)); in dsa_get_device_compute_capability()
53 return compute_capability; in dsa_get_device_compute_capability()
/aosp_15_r20/external/tensorflow/tensorflow/core/grappler/optimizers/
H A Dgeneric_layout_optimizer.cc63 double compute_capability = 0.0; in GetNumGPUs() local
64 if (absl::SimpleAtod(compute_capability_it->second, &compute_capability) && in GetNumGPUs()
65 compute_capability >= 7.0) { in GetNumGPUs()
H A Dremapper.cc423 double compute_capability = 0.0; in RuntimeFusionEnabled() local
424 if (absl::SimpleAtod(cc_it->second, &compute_capability) && in RuntimeFusionEnabled()
425 compute_capability >= 8.0) { in RuntimeFusionEnabled()
/aosp_15_r20/external/tensorflow/
H A Dconfigure.py912 for compute_capability in tf_cuda_compute_capabilities.split(','):
913 m = re.match('[0-9]+.[0-9]+', compute_capability)
917 compute_capability)
919 print('Invalid compute capability: %s' % compute_capability)
/aosp_15_r20/external/tensorflow/tensorflow/core/profiler/protobuf/
H A Dhardware_types.proto29 GPUComputeCapability compute_capability = 5; field
/aosp_15_r20/external/tensorflow/tensorflow/core/protobuf/
H A Dautotuning.proto98 ComputeCapability compute_capability = 4; field
/aosp_15_r20/external/tensorflow/tensorflow/core/kernels/
H A Ddepthwise_conv_op_gpu.h677 se::CudaComputeCapability compute_capability =
680 return compute_capability.IsAtLeast(5, 3) &&
681 compute_capability != se::CudaComputeCapability{6, 1};
/aosp_15_r20/external/tensorflow/tensorflow/python/framework/
H A Dtest_util.py1889 cc = gpu_info.compute_capability or (0, 0)