1 #define TORCH_ASSERT_NO_OPERATORS
2 #include <ATen/native/cuda/Reduce.cuh>
3 #include <c10/util/ArrayRef.h>
4
5 #include <iostream>
6
7
8 namespace at::native {
9
operator <<(std::ostream & out,dim3 dim)10 static inline std::ostream& operator<<(std::ostream& out, dim3 dim) {
11 if (dim.y == 1 && dim.z == 1) {
12 out << dim.x;
13 } else {
14 out << "[" << dim.x << "," << dim.y << "," << dim.z << "]";
15 }
16 return out;
17 }
18
operator <<(std::ostream & out,const ReduceConfig & config)19 std::ostream& operator<<(std::ostream& out, const ReduceConfig& config) {
20 out << "ReduceConfig(";
21 out << "element_size_bytes=" << config.element_size_bytes << ", ";
22 out << "num_inputs=" << config.num_inputs << ", ";
23 out << "num_outputs=" << config.num_outputs << ", ";
24 out << "step_input=" << config.step_input << ", ";
25 out << "step_output=" << config.step_output << ", ";
26 out << "ctas_per_output=" << config.ctas_per_output << ", ";
27 out << "input_mult=[";
28 for (int i = 0; i < 3; i++) {
29 if (i != 0) {
30 out << ",";
31 }
32 out << config.input_mult[i];
33 }
34 out << "], ";
35 out << "output_mult=[";
36 for (int i = 0; i < 2; i++) {
37 if (i != 0) {
38 out << ",";
39 }
40 out << config.output_mult[i];
41 }
42 out << "], ";
43 out << "vectorize_input=" << config.vectorize_input << ", ";
44 out << "output_vec_size=" << config.output_vec_size << ", ";
45 out << "block_width=" << config.block_width << ", ";
46 out << "block_height=" << config.block_height << ", ";
47 out << "num_threads=" << config.num_threads << ", ";
48 out << "values_per_thread=" << config.values_per_thread() << ", ";
49 out << "block=" << config.block() << ", ";
50 out << "grid=" << config.grid() << ", ";
51 out << "global_memory_size=" << config.global_memory_size();
52 out << ")";
53 return out;
54 }
55
56 } // namespace at::native
57