xref: /aosp_15_r20/external/pytorch/test/cpp/c10d/example/allreduce.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <c10/util/irange.h>
2 #include <torch/csrc/distributed/c10d/FileStore.hpp>
3 #include <torch/csrc/distributed/c10d/ProcessGroupGloo.hpp>
4 
5 using namespace ::c10d;
6 
main(int argc,char ** argv)7 int main(int argc, char** argv) {
8   int rank = atoi(getenv("RANK"));
9   int size = atoi(getenv("SIZE"));
10   auto store = c10::make_intrusive<FileStore>("/tmp/c10d_example", size);
11   ProcessGroupGloo pg(store, rank, size);
12 
13   // Create some tensors
14   const auto ntensors = 10;
15   std::vector<at::Tensor> tensors;
16   for (const auto i : c10::irange(ntensors)) {
17     auto x =
18         at::ones({1000, 16 * (i + 1)}, at::TensorOptions(at::CPU(at::kFloat)));
19     tensors.push_back(x);
20   }
21 
22   // Kick off work
23   std::vector<c10::intrusive_ptr<Work>> pending;
24   for (const auto i : c10::irange(ntensors)) {
25     std::vector<at::Tensor> tmp = {tensors[i]};
26     pending.push_back(pg.allreduce(tmp));
27   }
28 
29   // Wait for work to complete
30   for (auto& work : pending) {
31     work->wait();
32   }
33 }
34