/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
H A D | gpu_conv_runner.cc | 75 DeviceMemoryBase scratch_memory) { in RunGpuConvUnfused() argument 111 return (*runner)(stream, options.profile_result, scratch_memory, input_buf, in RunGpuConvUnfused() 121 DeviceMemoryBase scratch_memory) { in RunGpuConvForwardActivation() argument 173 return (*runner)(stream, options.profile_result, scratch_memory, input_buf, in RunGpuConvForwardActivation() 193 DeviceMemoryBase scratch_memory) { in RunGpuConvInternalImpl() argument 199 output_buf, scratch_memory); in RunGpuConvInternalImpl() 203 scratch_memory); in RunGpuConvInternalImpl() 218 DeviceMemoryBase scratch_memory) { in RunGpuConvInternalImpl() argument 222 output_buf, scratch_memory); in RunGpuConvInternalImpl() 226 scratch_memory); in RunGpuConvInternalImpl() [all …]
|
H A D | gpu_conv_algorithm_picker.cc | 511 se::DeviceMemoryBase scratch_memory = scratch_or.ValueOrDie(); in AutotuneOneConvRunner() local 518 scratch_memory, stream, options); in AutotuneOneConvRunner() 865 DeviceMemoryBase scratch_memory, in PickBestAlgorithmNoCacheRocm() 880 scratch_memory, stream, options); in PickBestAlgorithmNoCacheRocm()
|
H A D | gpu_conv_runner.h | 181 se::DeviceMemoryBase scratch_memory, se::Stream* stream,
|
/aosp_15_r20/external/webrtc/modules/audio_processing/aec3/ |
H A D | matched_filter.cc | 164 rtc::ArrayView<float> scratch_memory) { in MatchedFilterCoreWithAccumulatedError_NEON() argument 179 std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin()); in MatchedFilterCoreWithAccumulatedError_NEON() 180 std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1); in MatchedFilterCoreWithAccumulatedError_NEON() 183 chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; in MatchedFilterCoreWithAccumulatedError_NEON() 218 x_p = chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; in MatchedFilterCoreWithAccumulatedError_NEON() 246 rtc::ArrayView<float> scratch_memory) { in MatchedFilterCore_NEON() argument 254 error_sum, accumulated_error, scratch_memory); in MatchedFilterCore_NEON() 363 rtc::ArrayView<float> scratch_memory) { in MatchedFilterCore_AccumulatedError_SSE2() argument 376 std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin()); in MatchedFilterCore_AccumulatedError_SSE2() 377 std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1); in MatchedFilterCore_AccumulatedError_SSE2() [all …]
|
H A D | matched_filter_avx2.cc | 41 rtc::ArrayView<float> scratch_memory) { in MatchedFilterCore_AccumulatedError_AVX2() argument 55 std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin()); in MatchedFilterCore_AccumulatedError_AVX2() 56 std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1); in MatchedFilterCore_AccumulatedError_AVX2() 59 chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; in MatchedFilterCore_AccumulatedError_AVX2() 119 chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; in MatchedFilterCore_AccumulatedError_AVX2() 149 rtc::ArrayView<float> scratch_memory) { in MatchedFilterCore_AVX2() argument 153 error_sum, accumulated_error, scratch_memory); in MatchedFilterCore_AVX2()
|
H A D | matched_filter.h | 44 rtc::ArrayView<float> scratch_memory); 61 rtc::ArrayView<float> scratch_memory); 74 rtc::ArrayView<float> scratch_memory);
|
H A D | matched_filter_unittest.cc | 70 std::vector<float> scratch_memory(512); in TEST_P() local 84 scratch_memory); in TEST_P() 132 std::vector<float> scratch_memory(512); in TEST_P() local 145 accumulated_error_SSE2, scratch_memory); in TEST_P() 188 std::vector<float> scratch_memory(512); in TEST_P() local 199 accumulated_error_AVX2, scratch_memory); in TEST_P()
|
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/stream_executor/ |
H A D | dnn.h | 892 DeviceMemoryBase scratch_memory, 1331 DeviceMemory<uint8>* scratch_memory) { in PrepareForConvolution() argument 1336 scratch_allocator, algorithm_desc, scratch_memory); in PrepareForConvolution() 1380 AlgorithmDesc algorithm_desc, DeviceMemory<uint8> scratch_memory, 2315 DeviceMemory<uint8>* scratch_memory, in PrepareForCtcLoss() argument 2320 workspace_allocator, scratch_memory, ctc_loss_algo_id); in PrepareForCtcLoss() 2351 DeviceMemory<uint8> scratch_memory, int ctc_loss_algo_id); 2363 DeviceMemory<uint8>* scratch_memory, int ctc_loss_algo_id) { in DoCtcLoss() argument 2368 *scratch_memory, ctc_loss_algo_id), in DoCtcLoss() 2627 DeviceMemory<uint8>* scratch_memory) { in DoPrepareForConvolution() argument [all …]
|
H A D | stream.h | 347 DeviceMemory<uint8> scratch_memory; in ConvolveWithAlgorithm() local 354 &scratch_memory)); in ConvolveWithAlgorithm() 360 scratch_memory, output_profile_result); in ConvolveWithAlgorithm()
|
H A D | stream.cc | 2284 DeviceMemory<uint8> scratch_memory; in ThenCtcLoss() local 2290 &scratch_memory, &ctc_loss_algo_id) in ThenCtcLoss() 2296 &scratch_memory, ctc_loss_algo_id); in ThenCtcLoss()
|
H A D | dnn.cc | 882 DeviceMemory<uint8> scratch_memory, int ctc_loss_algo_id) { in DoCtcLoss() argument
|
/aosp_15_r20/external/tensorflow/tensorflow/stream_executor/rocm/ |
H A D | rocm_dnn.cc | 2451 ScratchAllocator* scratch_allocator, DeviceMemory<uint8>* scratch_memory, in DoPrepareForCtcLoss() argument 2479 *scratch_memory = DeviceMemory<uint8>(); in DoPrepareForCtcLoss() 2490 *scratch_memory = scratch_or.ValueOrDie(); in DoPrepareForCtcLoss() 2515 DeviceMemory<uint8> scratch_memory, int ctc_loss_algo_id) { in DoCtcLossImpl() argument 2529 scratch_memory.opaque(), scratch_memory.size()); in DoCtcLossImpl() 2545 DeviceMemoryBase grads_data, DeviceMemory<uint8> scratch_memory, in DoCtcLoss() argument 2565 scratch_memory, ctc_loss_algo_id); in DoCtcLoss() 2912 DeviceMemory<uint8>* scratch_memory) { in DoPrepareForConvolution() argument 2934 *scratch_memory = allocated.ValueOrDie(); in DoPrepareForConvolution() 2982 DeviceMemoryBase scratch_memory, in operator ()() argument [all …]
|
H A D | rocm_dnn.h | 348 dnn::AlgorithmDesc algorithm_desc, DeviceMemory<uint8> scratch_memory, 620 DeviceMemory<uint8> scratch_memory, 771 DeviceMemory<uint8>* scratch_memory) override; 780 DeviceMemory<uint8> scratch_memory, int ctc_loss_algo_id); 789 ScratchAllocator* scratch_allocator, DeviceMemory<uint8>* scratch_memory,
|
/aosp_15_r20/external/tensorflow/tensorflow/core/kernels/ |
H A D | conv_ops_gpu.h | 140 se::DeviceMemoryBase scratch_memory; in AllocateScratchOrFallback() local 144 scratch_memory = scratch_or.ValueOrDie(); in AllocateScratchOrFallback() 157 return std::make_tuple(selected_runner, scratch_memory); in AllocateScratchOrFallback()
|
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/stream_executor/cuda/ |
H A D | cuda_dnn.cc | 2380 DeviceMemory<uint8> scratch_memory, int ctc_loss_algo_id) { in DoCtcLossImpl() argument 2401 /*workspace=*/scratch_memory.opaque(), in DoCtcLossImpl() 2402 /*workSpaceSizeInBytes=*/scratch_memory.size())); in DoCtcLossImpl() 3965 DeviceMemory<uint8>* scratch_memory) { in DoPrepareForConvolution() argument 3984 output_nd, scratch_allocator, scratch_memory)); in DoPrepareForConvolution() 3992 output_nd, scratch_allocator, scratch_memory)); in DoPrepareForConvolution() 4000 output_nd, scratch_allocator, scratch_memory)); in DoPrepareForConvolution() 4081 DeviceMemoryBase scratch_memory, in operator ()() argument 4155 /*workSpace=*/scratch_memory.opaque(), in operator ()() 4156 /*workSpaceSizeInBytes=*/scratch_memory.size(), /*beta=*/beta, in operator ()() [all …]
|
H A D | cuda_dnn.h | 344 dnn::AlgorithmDesc algorithm_desc, DeviceMemory<uint8> scratch_memory, 518 DeviceMemory<uint8> scratch_memory, 620 DeviceMemory<uint8> scratch_memory, int ctc_loss_algo_id); 633 DeviceMemory<uint8>* scratch_memory) override; 642 ScratchAllocator* scratch_allocator, DeviceMemory<uint8>* scratch_memory,
|