xref: /aosp_15_r20/external/pytorch/aten/src/ATen/native/Histogram.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
2 #include <ATen/core/Tensor.h>
3 #include <ATen/Dispatch.h>
4 
5 #include <ATen/native/Histogram.h>
6 #include <ATen/native/Resize.h>
7 
8 #ifndef AT_PER_OPERATOR_HEADERS
9 #include <ATen/Functions.h>
10 #include <ATen/NativeFunctions.h>
11 #else
12 #include <ATen/ops/_histogramdd_bin_edges.h>
13 #include <ATen/ops/_histogramdd_bin_edges_native.h>
14 #include <ATen/ops/_histogramdd_from_bin_cts.h>
15 #include <ATen/ops/_histogramdd_from_bin_cts_native.h>
16 #include <ATen/ops/_histogramdd_from_bin_tensors.h>
17 #include <ATen/ops/_histogramdd_from_bin_tensors_native.h>
18 #include <ATen/ops/aminmax.h>
19 #include <ATen/ops/empty.h>
20 #include <ATen/ops/histc_native.h>
21 #include <ATen/ops/histogram_native.h>
22 #include <ATen/ops/histogramdd_native.h>
23 #include <ATen/ops/linspace.h>
24 #endif
25 
26 #include <numeric>
27 #include <tuple>
28 #include <vector>
29 #include <functional>
30 #include <c10/util/ArrayRef.h>
31 #include <c10/core/ScalarType.h>
32 #include <c10/core/DefaultDtype.h>
33 #include <c10/util/irange.h>
34 
35 /* Implements a numpy-like histogramdd function running on cpu
36  * https://numpy.org/doc/stable/reference/generated/numpy.histogramdd.html
37  *
38  * See the docstr for torch.histogramdd in torch/functional.py for further explanation.
39  *
40  * - torch.histogramdd(input, bins, range=None, weight=None, density=False)
41  *   input     - tensor with shape (M, N). input is interpreted as M coordinates in N-dimensional space.
42  *               If a tensor with more than 2 dimensions is passed, all but the last dimension will be flattened.
43  *   bins      - int[] of length N or tensor list of length N. If int[], defines the number of equal-width bins
44  *               in each dimension. If tensor list, defines the sequences of bin edges, including rightmost edges,
45  *               for each dimension.
46  *   range     - float[] of length 2 * N, optional. If specified, defines the leftmost and rightmost bin edges
47  *               for each dimension.
48  *   weight    - tensor, optional. If provided, weight should have the same shape as input excluding its last dimension.
49  *               Each N-dimensional value in input contributes its associated weight towards its bin's result.
50  *               If weight is not specified, each value has weight 1 by default.
51  *   density   - bool, optional. If false (default), the result will contain the total count (weight) in each bin.
52  *               If True, each count (weight) is divided by the total count (total weight), then divided by the
53  *               volume of its associated bin.
54  *
55  * Returns:
56  *   hist      - N-dimensional tensor containing the values of the histogram.
57  *   bin_edges - tensor list of length N containing the edges of the histogram bins in each dimension.
58  *               Bins include their left edge and exclude their right edge, with the exception of the
59  *               rightmost bin in each dimension which includes both of its edges.
60  *
61  * Restrictions are defined in histogram_check_inputs() and in select_outer_bin_edges().
62  */
63 
64 namespace at::native {
65 
66 DEFINE_DISPATCH(histogramdd_stub);
67 DEFINE_DISPATCH(histogramdd_linear_stub);
68 DEFINE_DISPATCH(histogram_select_outer_bin_edges_stub);
69 
70 namespace {
71 
72 /* Checks properties of input tensors input, bins, and weight.
73  */
histogramdd_check_inputs(const Tensor & input,const TensorList & bins,const std::optional<Tensor> & weight)74 void histogramdd_check_inputs(const Tensor& input, const TensorList& bins, const std::optional<Tensor>& weight) {
75     TORCH_CHECK(input.dim() >= 2, "torch.histogramdd: input tensor should have at least 2 dimensions, but got ",
76                 input.dim());
77 
78     const int64_t N = input.size(-1);
79 
80     TORCH_CHECK(static_cast<int64_t>(bins.size()) == N, "torch.histogramdd: expected ", N, " sequences of bin edges for a ", N,
81                 "-dimensional histogram but got ", bins.size());
82 
83     auto input_dtype = input.dtype();
84     for (const auto dim : c10::irange(N)) {
85         const Tensor& dim_bins = bins[dim];
86 
87         auto bins_dtype = dim_bins.dtype();
88         TORCH_CHECK(input_dtype == bins_dtype, "torch.histogramdd: input tensor and bins tensors should",
89                 " have the same dtype, but got input with dtype ", input_dtype,
90                 " and bins for dimension ", dim, " with dtype ", bins_dtype);
91 
92         const int64_t dim_bins_dim = dim_bins.dim();
93         TORCH_CHECK(dim_bins_dim == 1, "torch.histogramdd: bins tensor should have one dimension,",
94                 " but got ", dim_bins_dim, " dimensions in the bins tensor for dimension ", dim);
95 
96         const int64_t numel = dim_bins.numel();
97         TORCH_CHECK(numel > 0, "torch.histogramdd: bins tensor should have at least 1 element,",
98                 " but got ", numel, " elements in the bins tensor for dimension ", dim);
99     }
100 
101     if (weight.has_value()) {
102         TORCH_CHECK(input.dtype() == weight.value().dtype(), "torch.histogramdd: if weight tensor is provided,"
103                 " input tensor and weight tensor should have the same dtype, but got input(", input.dtype(), ")",
104                 ", and weight(", weight.value().dtype(), ")");
105 
106         /* If a weight tensor is provided, we expect its shape to match that of
107          * the input tensor excluding its innermost dimension N.
108          */
109         auto input_sizes = input.sizes().vec();
110         input_sizes.pop_back();
111 
112         auto weight_sizes = weight.value().sizes().vec();
113         if (weight_sizes.empty()) {
114             // correctly handle scalars
115             weight_sizes = {1};
116         }
117 
118         TORCH_CHECK(input_sizes == weight_sizes, "torch.histogramdd: if weight tensor is provided it should have"
119                 " the same shape as the input tensor excluding its innermost dimension, but got input with shape ",
120                 input.sizes(), " and weight with shape ", weight.value().sizes());
121     }
122 }
123 
124 /* Checks properties of output tensors hist and bin_edges, then resizes them.
125  */
histogramdd_prepare_out(const Tensor & input,const std::vector<int64_t> & bin_ct,const Tensor & hist,const TensorList & bin_edges)126 void histogramdd_prepare_out(const Tensor& input, const std::vector<int64_t>& bin_ct,
127         const Tensor& hist, const TensorList& bin_edges) {
128     const int64_t N = input.size(-1);
129 
130     TORCH_INTERNAL_ASSERT((int64_t)bin_ct.size() == N);
131     TORCH_INTERNAL_ASSERT((int64_t)bin_edges.size() == N);
132 
133     TORCH_CHECK(input.dtype() == hist.dtype(), "torch.histogram: input tensor and hist tensor should",
134             " have the same dtype, but got input ", input.dtype(), " and hist ", hist.dtype());
135 
136     for (const auto dim : c10::irange(N)) {
137         TORCH_CHECK(input.dtype() == bin_edges[dim].dtype(), "torch.histogram: input tensor and bin_edges tensor should",
138                 " have the same dtype, but got input ", input.dtype(), " and bin_edges ", bin_edges[dim].dtype(),
139                 " for dimension ", dim);
140 
141         TORCH_CHECK(bin_ct[dim] > 0,
142                 "torch.histogram(): bins must be > 0, but got ", bin_ct[dim], " for dimension ", dim);
143 
144         at::native::resize_output(bin_edges[dim], bin_ct[dim] + 1);
145     }
146 
147     at::native::resize_output(hist, bin_ct);
148 }
149 
histogramdd_prepare_out(const Tensor & input,TensorList bins,const Tensor & hist,const TensorList & bin_edges)150 void histogramdd_prepare_out(const Tensor& input, TensorList bins,
151         const Tensor& hist, const TensorList& bin_edges) {
152     std::vector<int64_t> bin_ct(bins.size());
153     std::transform(bins.begin(), bins.end(), bin_ct.begin(), [](Tensor t) { return t.numel() - 1; });
154     histogramdd_prepare_out(input, bin_ct, hist, bin_edges);
155 }
156 
157 /* Determines the outermost bin edges. For simplicity when calling into aminmax,
158  * assumes that input has already been reshaped to (M, N).
159  */
160 std::pair<std::vector<double>, std::vector<double>>
select_outer_bin_edges(const Tensor & input,std::optional<c10::ArrayRef<double>> range)161 select_outer_bin_edges(const Tensor& input, std::optional<c10::ArrayRef<double>> range) {
162     TORCH_INTERNAL_ASSERT(input.dim() == 2, "expected input to have shape (M, N)");
163     const int64_t N = input.size(-1);
164 
165     // Default ranges for empty input matching numpy.histogram's default
166     std::vector<double> leftmost_edges(N, 0.);
167     std::vector<double> rightmost_edges(N, 1.);
168 
169     if (range.has_value()) {
170         // range is specified
171         TORCH_CHECK((int64_t)range.value().size() == 2 * N, "torch.histogramdd: for a ", N, "-dimensional histogram",
172                 " range should have ", 2 * N, " elements, but got ", range.value().size());
173 
174         for (const auto dim : c10::irange(N)) {
175             leftmost_edges[dim] = range.value()[2 * dim];
176             rightmost_edges[dim] = range.value()[2 * dim + 1];
177         }
178     } else if (input.numel() > 0) {
179         // non-empty input
180 
181         histogram_select_outer_bin_edges_stub(input.device().type(), input, N, leftmost_edges, rightmost_edges);
182     }
183 
184     for (const auto dim : c10::irange(N)) {
185         double leftmost_edge = leftmost_edges[dim];
186         double rightmost_edge = rightmost_edges[dim];
187 
188         TORCH_CHECK(std::isfinite(leftmost_edge) && std::isfinite(rightmost_edge),
189                 "torch.histogramdd: dimension ", dim, "'s range [",
190                 leftmost_edge, ", ", rightmost_edge, "] is not finite");
191 
192         TORCH_CHECK(leftmost_edge <= rightmost_edge, "torch.histogramdd: min should not exceed max, but got",
193                 " min ", leftmost_edge, " max ", rightmost_edge, " for dimension ", dim);
194 
195         // Expand empty range to match numpy behavior and avoid division by 0 in normalization
196         if (leftmost_edge == rightmost_edge) {
197             leftmost_edges[dim] -= 0.5;
198             rightmost_edges[dim] += 0.5;
199         }
200     }
201 
202     return std::make_pair(leftmost_edges, rightmost_edges);
203 }
204 
205 /* histc's version of the logic for outermost bin edges.
206  */
histc_select_outer_bin_edges(const Tensor & input,const Scalar & min,const Scalar & max)207 std::pair<double, double> histc_select_outer_bin_edges(const Tensor& input,
208         const Scalar& min, const Scalar& max) {
209     double leftmost_edge = min.to<double>();
210     double rightmost_edge = max.to<double>();
211 
212     if (leftmost_edge == rightmost_edge && input.numel() > 0) {
213         auto extrema = aminmax(input);
214         leftmost_edge = std::get<0>(extrema).item<double>();
215         rightmost_edge = std::get<1>(extrema).item<double>();
216     }
217 
218     if (leftmost_edge == rightmost_edge) {
219         leftmost_edge -= 1;
220         rightmost_edge += 1;
221     }
222 
223     TORCH_CHECK(!(std::isinf(leftmost_edge) || std::isinf(rightmost_edge) ||
224             std::isnan(leftmost_edge) || std::isnan(rightmost_edge)),
225             "torch.histc: range of [", leftmost_edge, ", ", rightmost_edge, "] is not finite");
226 
227     TORCH_CHECK(leftmost_edge < rightmost_edge, "torch.histc: max must be larger than min");
228 
229     return std::make_pair(leftmost_edge, rightmost_edge);
230 }
231 
232 } // namespace
233 
allocate_bin_edges_tensors(const Tensor & self)234 static std::vector<Tensor> allocate_bin_edges_tensors(const Tensor& self) {
235     TORCH_CHECK(self.dim() >= 2, "torch.histogramdd: input tensor should have at least 2 dimensions");
236     const int64_t N = self.size(-1);
237     std::vector<Tensor> bin_edges_out(N);
238     for (const auto dim : c10::irange(N)) {
239         bin_edges_out[dim] = at::empty({0}, self.options(), MemoryFormat::Contiguous);
240     }
241     return bin_edges_out;
242 }
243 
244 /* Versions of histogramdd in which bins is a Tensor[] defining the sequences of bin edges.
245  */
histogramdd_out(const Tensor & self,TensorList bins,const std::optional<Tensor> & weight,bool density,Tensor & hist,TensorList & bin_edges)246 static Tensor& histogramdd_out(const Tensor& self, TensorList bins,
247         const std::optional<Tensor>& weight, bool density,
248         Tensor& hist, TensorList& bin_edges) {
249     histogramdd_check_inputs(self, bins, weight);
250     histogramdd_prepare_out(self, bins, hist, bin_edges);
251 
252     for (const auto dim : c10::irange(bins.size())) {
253         bin_edges[dim].copy_(bins[dim]);
254     }
255 
256     histogramdd_stub(self.device().type(), self, weight, density, hist, bin_edges);
257     return hist;
258 }
259 
_histogramdd(const Tensor & self,TensorList bins,const std::optional<Tensor> & weight,bool density)260 Tensor _histogramdd(const Tensor& self, TensorList bins,
261         const std::optional<Tensor>& weight, bool density) {
262     Tensor hist = at::empty({0}, self.options(), MemoryFormat::Contiguous);
263     std::vector<Tensor> bin_edges_out = allocate_bin_edges_tensors(self);
264     TensorList bin_edges_out_tl(bin_edges_out);
265 
266     histogramdd_out(self, bins, weight, density, hist, bin_edges_out_tl);
267     return hist;
268 }
269 
270 /* Versions of histogramdd in which bins is an int[]
271  * defining the number of bins in each dimension.
272  */
histogramdd_bin_edges_out(const Tensor & self,IntArrayRef bin_ct,std::optional<c10::ArrayRef<double>> range,const std::optional<Tensor> & weight,bool density,std::vector<Tensor> & bin_edges_out)273 static std::vector<Tensor>& histogramdd_bin_edges_out(const Tensor& self, IntArrayRef bin_ct,
274         std::optional<c10::ArrayRef<double>> range,
275         const std::optional<Tensor>& weight, bool density,
276         std::vector<Tensor>& bin_edges_out) {
277     TensorList bin_edges_out_tl(bin_edges_out);
278 
279     const int64_t N = self.size(-1);
280     const int64_t M = std::accumulate(self.sizes().begin(), self.sizes().end() - 1,
281             (int64_t)1, std::multiplies<int64_t>());
282     Tensor reshaped_self = self.reshape({ M, N });
283 
284     auto outer_bin_edges = select_outer_bin_edges(reshaped_self, range);
285 
286     const int64_t bin_size = bin_ct.size();
287     TORCH_CHECK(
288         N == bin_size,
289         "histogramdd: The size of bins must be equal to the innermost dimension of the input.");
290     for (const auto dim : c10::irange(N)) {
291         at::linspace_out(bin_edges_out[dim], outer_bin_edges.first[dim], outer_bin_edges.second[dim],
292                 bin_ct[dim] + 1);
293     }
294 
295     return bin_edges_out;
296 }
297 
histogramdd_bin_edges(const Tensor & self,IntArrayRef bin_ct,std::optional<c10::ArrayRef<double>> range,const std::optional<Tensor> & weight,bool density)298 std::vector<Tensor> histogramdd_bin_edges(const Tensor& self, IntArrayRef bin_ct,
299         std::optional<c10::ArrayRef<double>> range,
300         const std::optional<Tensor>& weight, bool density) {
301     std::vector<Tensor> bin_edges_out = allocate_bin_edges_tensors(self);
302     return histogramdd_bin_edges_out(self, bin_ct, range, weight, density, bin_edges_out);
303 }
304 
histogramdd_out(const Tensor & self,IntArrayRef bin_ct,std::optional<c10::ArrayRef<double>> range,const std::optional<Tensor> & weight,bool density,Tensor & hist,TensorList & bin_edges)305 static Tensor& histogramdd_out(const Tensor& self, IntArrayRef bin_ct,
306         std::optional<c10::ArrayRef<double>> range,
307         const std::optional<Tensor>& weight, bool density,
308         Tensor& hist, TensorList& bin_edges) {
309     std::vector<Tensor> bins = histogramdd_bin_edges(self, bin_ct, range, weight, density);
310 
311     histogramdd_check_inputs(self, bins, weight);
312     histogramdd_prepare_out(self, bins, hist, bin_edges);
313 
314     for (const auto dim : c10::irange(bins.size())) {
315         bin_edges[dim].copy_(bins[dim]);
316     }
317 
318     histogramdd_linear_stub(self.device().type(), self, weight, density, hist, bin_edges, true);
319     return hist;
320 }
321 
_histogramdd(const Tensor & self,IntArrayRef bin_ct,std::optional<c10::ArrayRef<double>> range,const std::optional<Tensor> & weight,bool density)322 Tensor _histogramdd(const Tensor& self, IntArrayRef bin_ct,
323         std::optional<c10::ArrayRef<double>> range,
324         const std::optional<Tensor>& weight, bool density) {
325     Tensor hist = at::empty({0}, self.options(), MemoryFormat::Contiguous);
326     std::vector<Tensor> bin_edges_out = allocate_bin_edges_tensors(self);
327     TensorList bin_edges_out_tl(bin_edges_out);
328 
329     histogramdd_out(self, bin_ct, range, weight, density, hist, bin_edges_out_tl);
330     return hist;
331 }
332 
333 /* Versions of histogram in which bins is a Tensor defining the sequence of bin edges.
334  */
335 std::tuple<Tensor&, Tensor&>
histogram_out(const Tensor & self,const Tensor & bins,const std::optional<Tensor> & weight,bool density,Tensor & hist,Tensor & bin_edges)336 histogram_out(const Tensor& self, const Tensor& bins,
337         const std::optional<Tensor>& weight, bool density,
338         Tensor& hist, Tensor& bin_edges) {
339     Tensor reshaped_self = self.reshape({ self.numel(), 1 });
340     std::optional<Tensor> reshaped_weight = weight.has_value()
341         ? weight.value().reshape({ weight.value().numel() }) : weight;
342     TensorList bins_in = bins;
343     TensorList bins_out = bin_edges;
344 
345     histogramdd_out(reshaped_self, bins_in, reshaped_weight, density, hist, bins_out);
346 
347     return std::forward_as_tuple(hist, bin_edges);
348 }
349 
350 std::tuple<Tensor, Tensor>
histogram(const Tensor & self,const Tensor & bins,const std::optional<Tensor> & weight,bool density)351 histogram(const Tensor& self, const Tensor& bins,
352         const std::optional<Tensor>& weight, bool density) {
353     Tensor hist = at::empty({0}, self.options(), MemoryFormat::Contiguous);
354     Tensor bin_edges = at::empty({0}, bins.options(), MemoryFormat::Contiguous);
355     return histogram_out(self, bins, weight, density, hist, bin_edges);
356 }
357 
358 /* Versions of histogram in which bins is an integer specifying the number of equal-width bins.
359  */
360 std::tuple<Tensor&, Tensor&>
histogram_out(const Tensor & self,int64_t bin_ct,std::optional<c10::ArrayRef<double>> range,const std::optional<Tensor> & weight,bool density,Tensor & hist,Tensor & bin_edges)361 histogram_out(const Tensor& self, int64_t bin_ct, std::optional<c10::ArrayRef<double>> range,
362         const std::optional<Tensor>& weight, bool density,
363         Tensor& hist, Tensor& bin_edges) {
364     Tensor reshaped_self = self.reshape({ self.numel(), 1 });
365     std::optional<Tensor> reshaped_weight = weight.has_value()
366         ? weight.value().reshape({ weight.value().numel() }) : weight;
367     TensorList bins_in = bin_edges;
368     TensorList bins_out = bin_edges;
369 
370     histogramdd_prepare_out(reshaped_self, std::vector<int64_t>{bin_ct}, hist, bins_out);
371     auto outer_bin_edges = select_outer_bin_edges(reshaped_self, range);
372     at::linspace_out(bin_edges, outer_bin_edges.first[0], outer_bin_edges.second[0], bin_ct + 1);
373 
374     histogramdd_check_inputs(reshaped_self, bins_in, reshaped_weight);
375 
376     histogramdd_linear_stub(reshaped_self.device().type(), reshaped_self, reshaped_weight, density, hist, bin_edges, true);
377     return std::forward_as_tuple(hist, bin_edges);
378 }
379 
380 std::tuple<Tensor, Tensor>
histogram(const Tensor & self,int64_t bin_ct,std::optional<c10::ArrayRef<double>> range,const std::optional<Tensor> & weight,bool density)381 histogram(const Tensor& self, int64_t bin_ct, std::optional<c10::ArrayRef<double>> range,
382         const std::optional<Tensor>& weight, bool density) {
383     Tensor hist = at::empty({0}, self.options(), MemoryFormat::Contiguous);
384     Tensor bin_edges_out = at::empty({0}, self.options());
385     return histogram_out(self, bin_ct, range, weight, density, hist, bin_edges_out);
386 }
387 
388 /* Narrowed interface for the legacy torch.histc function.
389  */
histogram_histc_out(const Tensor & self,int64_t bin_ct,const Scalar & min,const Scalar & max,Tensor & hist)390 Tensor& histogram_histc_out(const Tensor& self, int64_t bin_ct,
391         const Scalar& min, const Scalar& max, Tensor& hist) {
392     Tensor bin_edges = at::empty({0}, self.options());
393 
394     Tensor reshaped = self.reshape({ self.numel(), 1 });
395     TensorList bins_in = bin_edges;
396     TensorList bins_out = bin_edges;
397 
398     histogramdd_prepare_out(reshaped, std::vector<int64_t>{bin_ct}, hist, bins_out);
399 
400     auto outer_bin_edges = histc_select_outer_bin_edges(self, min, max);
401     at::linspace_out(bin_edges, outer_bin_edges.first, outer_bin_edges.second, bin_ct + 1);
402 
403     histogramdd_check_inputs(reshaped, bins_in, {});
404 
405     histogramdd_linear_stub(reshaped.device().type(), reshaped,
406             std::optional<Tensor>(), false, hist, bin_edges, false);
407     return hist;
408 }
409 
histogram_histc(const Tensor & self,int64_t bin_ct,const Scalar & min,const Scalar & max)410 Tensor histogram_histc(const Tensor& self, int64_t bin_ct,
411         const Scalar& min, const Scalar& max) {
412     Tensor hist = at::empty({0}, self.options(), MemoryFormat::Contiguous);
413     return histogram_histc_out(self, bin_ct, min, max, hist);
414 }
415 
histogramdd(const Tensor & self,TensorList bins,std::optional<ArrayRef<double>>,const std::optional<Tensor> & weight,bool density)416 std::tuple<Tensor, std::vector<Tensor>> histogramdd(
417     const Tensor &self, TensorList bins, std::optional<ArrayRef<double>> /*range*/,
418     const std::optional<Tensor> &weight, bool density) {
419   auto hist = at::_histogramdd_from_bin_tensors(self, bins, weight, density);
420   return std::tuple<Tensor, std::vector<Tensor>>{
421       std::move(hist), bins.vec()};
422 }
423 
histogramdd(const Tensor & self,IntArrayRef bins,std::optional<ArrayRef<double>> range,const std::optional<Tensor> & weight,bool density)424 std::tuple<Tensor, std::vector<Tensor>> histogramdd(
425     const Tensor &self, IntArrayRef bins, std::optional<ArrayRef<double>> range,
426     const std::optional<Tensor> &weight, bool density) {
427   auto bin_edges = at::_histogramdd_bin_edges(self, bins, range, weight, density);
428   auto hist = at::_histogramdd_from_bin_cts(self, bins, range, weight, density);
429   return std::tuple<Tensor, std::vector<Tensor>>{
430       std::move(hist), std::move(bin_edges)};
431 }
432 
histogramdd(const Tensor & self,int64_t bins,std::optional<ArrayRef<double>> range,const std::optional<Tensor> & weight,bool density)433 std::tuple<Tensor, std::vector<Tensor>> histogramdd(
434     const Tensor &self, int64_t bins, std::optional<ArrayRef<double>> range,
435     const std::optional<Tensor> &weight, bool density) {
436   DimVector bins_v(self.size(-1), bins);
437   return at::native::histogramdd(self, bins_v, range, weight, density);
438 }
439 
440 } // namespace at::native
441