Home
last modified time | relevance | path

Searched refs:scratch_memory (Results 1 – 16 of 16) sorted by relevance

/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/service/gpu/
H A Dgpu_conv_runner.cc75 DeviceMemoryBase scratch_memory) { in RunGpuConvUnfused() argument
111 return (*runner)(stream, options.profile_result, scratch_memory, input_buf, in RunGpuConvUnfused()
121 DeviceMemoryBase scratch_memory) { in RunGpuConvForwardActivation() argument
173 return (*runner)(stream, options.profile_result, scratch_memory, input_buf, in RunGpuConvForwardActivation()
193 DeviceMemoryBase scratch_memory) { in RunGpuConvInternalImpl() argument
199 output_buf, scratch_memory); in RunGpuConvInternalImpl()
203 scratch_memory); in RunGpuConvInternalImpl()
218 DeviceMemoryBase scratch_memory) { in RunGpuConvInternalImpl() argument
222 output_buf, scratch_memory); in RunGpuConvInternalImpl()
226 scratch_memory); in RunGpuConvInternalImpl()
[all …]
H A Dgpu_conv_algorithm_picker.cc511 se::DeviceMemoryBase scratch_memory = scratch_or.ValueOrDie(); in AutotuneOneConvRunner() local
518 scratch_memory, stream, options); in AutotuneOneConvRunner()
865 DeviceMemoryBase scratch_memory, in PickBestAlgorithmNoCacheRocm()
880 scratch_memory, stream, options); in PickBestAlgorithmNoCacheRocm()
H A Dgpu_conv_runner.h181 se::DeviceMemoryBase scratch_memory, se::Stream* stream,
/aosp_15_r20/external/webrtc/modules/audio_processing/aec3/
H A Dmatched_filter.cc164 rtc::ArrayView<float> scratch_memory) { in MatchedFilterCoreWithAccumulatedError_NEON() argument
179 std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin()); in MatchedFilterCoreWithAccumulatedError_NEON()
180 std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1); in MatchedFilterCoreWithAccumulatedError_NEON()
183 chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; in MatchedFilterCoreWithAccumulatedError_NEON()
218 x_p = chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; in MatchedFilterCoreWithAccumulatedError_NEON()
246 rtc::ArrayView<float> scratch_memory) { in MatchedFilterCore_NEON() argument
254 error_sum, accumulated_error, scratch_memory); in MatchedFilterCore_NEON()
363 rtc::ArrayView<float> scratch_memory) { in MatchedFilterCore_AccumulatedError_SSE2() argument
376 std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin()); in MatchedFilterCore_AccumulatedError_SSE2()
377 std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1); in MatchedFilterCore_AccumulatedError_SSE2()
[all …]
H A Dmatched_filter_avx2.cc41 rtc::ArrayView<float> scratch_memory) { in MatchedFilterCore_AccumulatedError_AVX2() argument
55 std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin()); in MatchedFilterCore_AccumulatedError_AVX2()
56 std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1); in MatchedFilterCore_AccumulatedError_AVX2()
59 chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; in MatchedFilterCore_AccumulatedError_AVX2()
119 chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; in MatchedFilterCore_AccumulatedError_AVX2()
149 rtc::ArrayView<float> scratch_memory) { in MatchedFilterCore_AVX2() argument
153 error_sum, accumulated_error, scratch_memory); in MatchedFilterCore_AVX2()
H A Dmatched_filter.h44 rtc::ArrayView<float> scratch_memory);
61 rtc::ArrayView<float> scratch_memory);
74 rtc::ArrayView<float> scratch_memory);
H A Dmatched_filter_unittest.cc70 std::vector<float> scratch_memory(512); in TEST_P() local
84 scratch_memory); in TEST_P()
132 std::vector<float> scratch_memory(512); in TEST_P() local
145 accumulated_error_SSE2, scratch_memory); in TEST_P()
188 std::vector<float> scratch_memory(512); in TEST_P() local
199 accumulated_error_AVX2, scratch_memory); in TEST_P()
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/stream_executor/
H A Ddnn.h892 DeviceMemoryBase scratch_memory,
1331 DeviceMemory<uint8>* scratch_memory) { in PrepareForConvolution() argument
1336 scratch_allocator, algorithm_desc, scratch_memory); in PrepareForConvolution()
1380 AlgorithmDesc algorithm_desc, DeviceMemory<uint8> scratch_memory,
2315 DeviceMemory<uint8>* scratch_memory, in PrepareForCtcLoss() argument
2320 workspace_allocator, scratch_memory, ctc_loss_algo_id); in PrepareForCtcLoss()
2351 DeviceMemory<uint8> scratch_memory, int ctc_loss_algo_id);
2363 DeviceMemory<uint8>* scratch_memory, int ctc_loss_algo_id) { in DoCtcLoss() argument
2368 *scratch_memory, ctc_loss_algo_id), in DoCtcLoss()
2627 DeviceMemory<uint8>* scratch_memory) { in DoPrepareForConvolution() argument
[all …]
H A Dstream.h347 DeviceMemory<uint8> scratch_memory; in ConvolveWithAlgorithm() local
354 &scratch_memory)); in ConvolveWithAlgorithm()
360 scratch_memory, output_profile_result); in ConvolveWithAlgorithm()
H A Dstream.cc2284 DeviceMemory<uint8> scratch_memory; in ThenCtcLoss() local
2290 &scratch_memory, &ctc_loss_algo_id) in ThenCtcLoss()
2296 &scratch_memory, ctc_loss_algo_id); in ThenCtcLoss()
H A Ddnn.cc882 DeviceMemory<uint8> scratch_memory, int ctc_loss_algo_id) { in DoCtcLoss() argument
/aosp_15_r20/external/tensorflow/tensorflow/stream_executor/rocm/
H A Drocm_dnn.cc2451 ScratchAllocator* scratch_allocator, DeviceMemory<uint8>* scratch_memory, in DoPrepareForCtcLoss() argument
2479 *scratch_memory = DeviceMemory<uint8>(); in DoPrepareForCtcLoss()
2490 *scratch_memory = scratch_or.ValueOrDie(); in DoPrepareForCtcLoss()
2515 DeviceMemory<uint8> scratch_memory, int ctc_loss_algo_id) { in DoCtcLossImpl() argument
2529 scratch_memory.opaque(), scratch_memory.size()); in DoCtcLossImpl()
2545 DeviceMemoryBase grads_data, DeviceMemory<uint8> scratch_memory, in DoCtcLoss() argument
2565 scratch_memory, ctc_loss_algo_id); in DoCtcLoss()
2912 DeviceMemory<uint8>* scratch_memory) { in DoPrepareForConvolution() argument
2934 *scratch_memory = allocated.ValueOrDie(); in DoPrepareForConvolution()
2982 DeviceMemoryBase scratch_memory, in operator ()() argument
[all …]
H A Drocm_dnn.h348 dnn::AlgorithmDesc algorithm_desc, DeviceMemory<uint8> scratch_memory,
620 DeviceMemory<uint8> scratch_memory,
771 DeviceMemory<uint8>* scratch_memory) override;
780 DeviceMemory<uint8> scratch_memory, int ctc_loss_algo_id);
789 ScratchAllocator* scratch_allocator, DeviceMemory<uint8>* scratch_memory,
/aosp_15_r20/external/tensorflow/tensorflow/core/kernels/
H A Dconv_ops_gpu.h140 se::DeviceMemoryBase scratch_memory; in AllocateScratchOrFallback() local
144 scratch_memory = scratch_or.ValueOrDie(); in AllocateScratchOrFallback()
157 return std::make_tuple(selected_runner, scratch_memory); in AllocateScratchOrFallback()
/aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/stream_executor/cuda/
H A Dcuda_dnn.cc2380 DeviceMemory<uint8> scratch_memory, int ctc_loss_algo_id) { in DoCtcLossImpl() argument
2401 /*workspace=*/scratch_memory.opaque(), in DoCtcLossImpl()
2402 /*workSpaceSizeInBytes=*/scratch_memory.size())); in DoCtcLossImpl()
3965 DeviceMemory<uint8>* scratch_memory) { in DoPrepareForConvolution() argument
3984 output_nd, scratch_allocator, scratch_memory)); in DoPrepareForConvolution()
3992 output_nd, scratch_allocator, scratch_memory)); in DoPrepareForConvolution()
4000 output_nd, scratch_allocator, scratch_memory)); in DoPrepareForConvolution()
4081 DeviceMemoryBase scratch_memory, in operator ()() argument
4155 /*workSpace=*/scratch_memory.opaque(), in operator ()()
4156 /*workSpaceSizeInBytes=*/scratch_memory.size(), /*beta=*/beta, in operator ()()
[all …]
H A Dcuda_dnn.h344 dnn::AlgorithmDesc algorithm_desc, DeviceMemory<uint8> scratch_memory,
518 DeviceMemory<uint8> scratch_memory,
620 DeviceMemory<uint8> scratch_memory, int ctc_loss_algo_id);
633 DeviceMemory<uint8>* scratch_memory) override;
642 ScratchAllocator* scratch_allocator, DeviceMemory<uint8>* scratch_memory,