xref: /aosp_15_r20/external/pytorch/aten/src/ATen/test/cpu_profiling_allocator_test.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <gtest/gtest.h>
2 
3 #include <c10/core/CPUAllocator.h>
4 #include <c10/mobile/CPUProfilingAllocator.h>
5 #include <ATen/ATen.h>
6 #include <ATen/Context.h>
7 
run_with_control_flow(at::Tensor input,at::Tensor conv_weight,at::Tensor linear_weight,bool cond,std::vector<void * > & pointers,bool record=false,bool validate=false)8 at::Tensor run_with_control_flow(
9     at::Tensor input,
10     at::Tensor conv_weight,
11     at::Tensor linear_weight,
12     bool cond,
13     std::vector<void*>& pointers,
14     bool record = false,
15     bool validate = false) {
16   if (cond) {
17     input = input * 2;
18   }
19   void* input_ptr = input.data_ptr();
20   auto conv_out = at::conv2d(input, conv_weight);
21   void* conv_out_ptr = input.data_ptr();
22   auto conv_out_flat = conv_out.view({conv_out.size(0), -1});
23   auto output = at::linear(conv_out_flat, linear_weight);
24   if (record) {
25     pointers.push_back(input_ptr);
26     pointers.push_back(conv_out_ptr);
27   }
28   if (validate) {
29     TORCH_CHECK(input_ptr == pointers[0]);
30     TORCH_CHECK(conv_out_ptr == pointers[1]);
31   }
32   return output;
33 }
34 
TEST(CPUAllocationPlanTest,with_control_flow)35 TEST(CPUAllocationPlanTest, with_control_flow) {
36   at::Tensor a = at::rand({23, 16, 16, 16});
37   at::Tensor conv_weight = at::rand({16, 16, 3, 3});
38   // output shape
39   // 23, 16, 14, 14
40   // Flattened shape = 23, 3136
41   at::Tensor linear_weight = at::rand({32, 3136});
42   at::Tensor output, ref_output;
43   std::vector<void*> pointers;
44 
45   auto valid_allocation_plan = [&]() {
46     c10::AllocationPlan plan;
47     {
48       c10::WithProfileAllocationsGuard profile_guard(&plan);
49       ref_output = run_with_control_flow(
50           a, conv_weight, linear_weight, true, pointers);
51     }
52   };
53   // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
54   ASSERT_NO_THROW(valid_allocation_plan());
55 
56   auto validate_allocation_plan =
57     [&](bool record_mode, bool validation_mode) -> bool {
58     c10::AllocationPlan plan;
59     {
60       c10::WithProfileAllocationsGuard profile_guard(&plan);
61       ref_output =
62         run_with_control_flow(a, conv_weight, linear_weight, record_mode, pointers);
63     }
64     bool success{true};
65     for (uint64_t i = 0; i < 10; ++i) {
66       // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
67       bool validation_success;
68       {
69         c10::WithValidateAllocationPlanGuard
70           validation_guard(&plan, &validation_success);
71         output = run_with_control_flow(
72             a, conv_weight, linear_weight, validation_mode, pointers);
73       }
74       success = success && validation_success;
75     }
76     return success;
77   };
78   ASSERT_TRUE(validate_allocation_plan(true, true));
79   ASSERT_TRUE(validate_allocation_plan(false, false));
80 
81   #ifdef C10_MOBILE
82   // Returning false when record mode != validation mode only applies to
83   // DefaultMobileCPUAllocator, DefaultCPUAllocator has no such behavior
84   // and will always return true
85   ASSERT_FALSE(validate_allocation_plan(false, true));
86   ASSERT_FALSE(validate_allocation_plan(true, false));
87   #else
88   ASSERT_TRUE(validate_allocation_plan(false, true));
89   ASSERT_TRUE(validate_allocation_plan(true, false));
90   #endif
91 }
92 
TEST(CPUAllocationPlanTest,with_profiling_alloc)93 TEST(CPUAllocationPlanTest, with_profiling_alloc) {
94   at::Tensor a = at::rand({23, 16, 16, 16});
95   at::Tensor conv_weight = at::rand({16, 16, 3, 3});
96   // output shape
97   // 23, 16, 14, 14
98   // Flattened shape = 23, 3136
99   at::Tensor linear_weight = at::rand({32, 3136});
100   at::Tensor output, ref_output;
101   std::vector<void*> pointers;
102 
103   auto valid_allocation_plan = [&]() {
104     c10::AllocationPlan plan;
105     {
106       c10::WithProfileAllocationsGuard profile_guard(&plan);
107       ref_output = run_with_control_flow(
108           a, conv_weight, linear_weight, false, pointers);
109     }
110   };
111   // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
112   ASSERT_NO_THROW(valid_allocation_plan());
113 
114   auto validate_allocation_plan =
115     [&](bool record_mode,
116         bool validation_mode,
117         bool validate_pointers) {
118       pointers.clear();
119       c10::AllocationPlan plan;
120       {
121         c10::WithProfileAllocationsGuard profile_guard(&plan);
122         ref_output = run_with_control_flow(
123             a,
124             conv_weight,
125             linear_weight,
126             record_mode,
127             pointers,
128             false,
129             false);
130       }
131       c10::CPUProfilingAllocator profiling_allocator;
132       {
133         c10::WithProfilingAllocatorGuard
134           profiling_allocator_guard(&profiling_allocator, &plan);
135         output = run_with_control_flow(
136             a,
137             conv_weight,
138             linear_weight,
139             validation_mode,
140             pointers,
141             validate_pointers,
142             false);
143       }
144       for (uint64_t i = 0; i < 10; ++i) {
145         {
146           c10::WithProfilingAllocatorGuard
147             profiling_allocator_guard(&profiling_allocator, &plan);
148           output = run_with_control_flow(
149               a,
150               conv_weight,
151               linear_weight,
152               validation_mode,
153               pointers,
154               false,
155               validate_pointers);
156         }
157       }
158   };
159   // When control flow conditions are same between profiling and evaluation
160   // profiling allocator should not throw.
161   // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
162   ASSERT_NO_THROW(validate_allocation_plan(true, true, false));
163   ASSERT_TRUE(ref_output.equal(output));
164   // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
165   ASSERT_NO_THROW(validate_allocation_plan(false, false, false));
166   ASSERT_TRUE(ref_output.equal(output));
167 
168   // Returning the same pointers is not a guarantee when the default
169   // allocator is used. It looks like the underlying memory pointer
170   // can change as long as output and ref_output remain equal. This
171   // has already been confirmed in the previous two tests
172   #ifdef C10_MOBILE
173   // Furthermore profiling allocator should return the same pointers
174   // back for the intermediate tensors
175   // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
176   ASSERT_NO_THROW(validate_allocation_plan(true, true, true));
177   ASSERT_TRUE(ref_output.equal(output));
178   // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
179   ASSERT_NO_THROW(validate_allocation_plan(false, false, true));
180   ASSERT_TRUE(ref_output.equal(output));
181 
182   // When control flow conditions are different between profiling and evaluation
183   // profiling allocator should throw.
184   // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
185   ASSERT_THROW(validate_allocation_plan(true, false, false), c10::Error);
186   // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
187   ASSERT_THROW(validate_allocation_plan(false, true, false), c10::Error);
188   #else
189   // Throwing when record mode != validation mode only applies to
190   // DefaultMobileCPUAllocator, DefaultCPUAllocator has no such
191   // behavior and throw nothing
192   ASSERT_NO_THROW(validate_allocation_plan(true, false, false));
193   ASSERT_NO_THROW(validate_allocation_plan(false, true, false));
194   #endif
195 }
196 
main(int argc,char * argv[])197 int main(int argc, char* argv[]) {
198   // Setting the priority high to make sure no other allocator gets used instead of this.
199   c10::SetCPUAllocator(c10::GetDefaultCPUAllocator(), /*priority*/ 100);
200 
201   #ifdef C10_MOBILE
202   // Need to disable mkldnn for this test since it allocated memory
203   // via raw_allocate inteface which requires context pointer and raw
204   // pointer to be the same. Tis is not true for mobile allocator.
205   at::globalContext().setUserEnabledMkldnn(false);
206   #endif
207 
208   ::testing::InitGoogleTest(&argc, argv);
209   at::manual_seed(42);
210   return RUN_ALL_TESTS();
211 }
212