1 #pragma once 2 3 #include <ATen/ATen.h> 4 #include <ATen/core/ivalue.h> 5 #include <ATen/core/jit_type.h> 6 #include <ATen/core/stack.h> 7 #include <torch/csrc/Export.h> 8 #include <torch/csrc/jit/ir/ir.h> 9 10 #include <list> 11 #include <map> 12 #include <unordered_map> 13 #include <vector> 14 15 // We would like to assign each position/axis of a tensor an abstract size 16 // * For each `tensor` we have a profiled `Value` of a `TensorType` describing 17 // the properties of the `tensor`. 18 // * `TensorType` has a property called `symbolic_sizes_` to describe observed 19 // `tensor.sizes()` 20 // * `symbolic_sizes_` is a vector of abstract sizes (or 21 // `std::vector<ShapeSymbol>`) where 22 // * `ShapeSymbol`at `symbolic_sizes_[i]` describes the size value 23 // (`Dimension`) at `tensor.sizes()[i]` 24 // * We may see the same `Dimension` at different positions `i` in 25 // `tensor.sizes()` or even in different `tensor` 26 // * First, we would like associate the same `ShapeSymbol` to the same 27 // `Dimension` across **one** profiling execution or run of a TorchScript 28 // function. 29 // * The same `ShapeSymbol`s in different positions of `symbolic_shapes_` in 30 // possibly different `TensorType`s (i.e. `TensorType`s for different 31 // profiled values) form an implicit set. The elements of such a set are 32 // called *dimension locations*. 33 // * These sets allow us to track how the shapes of input arguments of some 34 // operation relate to operation's output shapes as the input and output 35 // shapes might share the same `ShapeSymbol`s 36 // * For **every** profiling run, we would like to maintain the invariant that 37 // *the same `ShapeSymbol` is always associated with the same `Dimension`*. 38 // * To maintain this invariant we merge the profiling information from all 39 // profiling runs, 40 // * For every two runs, we iterate over all `symbic_shapes_` and compare 41 // their `ShapeSymbol`s in the same position. 42 // * if we observe that for every dimension location that has 43 // the`ShapeSymbol S1` in run #1 there is **only one** `ShapeSymbol S2` in 44 // the same dimension location in run #2, we conclude that the invariant 45 // holds. 46 // * However, if we observe some dimension locations in run #2 have 47 // `ShapeSymbol S2` and the other ones have `ShapeSymbol S3` we would like 48 // to partition the virtual set of dimension locations associated with 49 // `ShapeSymbol S1` into two new subsets, so the invariant holds. 50 // * The partitioning works by assigning a new symbol to the dimension 51 // locations (associated with `ShapeSymbol S1`) that have `ShapeSymbol S2` 52 // and another new symbol to the dimension locations that have `ShapeSymbol 53 // S3`. In other words, 54 // * Subset #1 will consist of the dimension locations that in run #2 have 55 // `ShapeSymbol S2` and will have `ShapeSymbol S4` in those dimension 56 // locations 57 // * Subset #2 will consist of the dimension locations that in run #2 have 58 // `ShapeSymbol S4` and will have `ShapeSymbol S5` in those dimension 59 // locations 60 // * The effective result of merging the profiling information from two runs 61 // is new `TensorTypes` whose `symbolic_sizes_` /dimension locations have 62 // either `ShapeSymbol S4` or `ShapeSymbol S5`. 63 // * Partitioning can be done even before we have seen all the dimension 64 // locations associated with `ShapeSymbol S1` 65 // * We use `getSymbolInSet` of `ShapeSymbolTable` to remember all 66 // `ShapeSymbols` from run #2 we observed in the dimension locations 67 // associated with `ShapeSymbol S1` . 68 // * For every `ShapeSymbol` from run #2 in the dimension location 69 // associated with `ShapeSymbol S1` `getSymbolInSet` returns a symbol 70 // that we assign to the dimension location in a new TensorType. 71 // * It's important to point out that the same `ShapeSymbol S2` from run 72 // #2 in two dimension locations that have different `ShapeSymbol`s in 73 // run #1 are different! These dimension locations will belong to 74 // different subsets and have different `ShapeSymbol`s after merge. 75 // * On the other hand, for the same `ShapeSymbol S2` in two dimension 76 // locations that have `ShapeSymbol S1` in run #1`getSymbolInSet` will 77 // return the same symbol. 78 79 namespace torch::jit { 80 81 using ::c10::TensorTypePtr; 82 using Dimension = int64_t; 83 84 TORCH_API void RegisterProfilingNode(const std::function<bool(const Node*)>&); 85 86 struct ProfilingRecord; 87 88 // `SetPartitioningHelper` is used to maintain the following invariant: 89 // For **every** profiling run, *the same `ShapeSymbol` is always associated 90 // with the same `Dimension`*. 91 // while merging the profiling information from multiple runs. 92 struct SetPartitioningHelper { 93 std::map<c10::ShapeSymbol, std::map<Dimension, c10::ShapeSymbol>> 94 sets2subsets_; 95 96 // `partitionSetByDimension` partitions a virtual set 97 // of dimension locations associated with ShapeSymbol `symbol` into subsets. 98 // Partitioning is equivalent to giving (or renaming) a particular 99 // dimension location a new `ShapeSymbol`. 100 // The same `Dimension` value in different dimension locations 101 // that used to have `symbol` will receive the same 102 // new `ShapeSymbol`, effectively forming a new set. partitionSetByDimensionSetPartitioningHelper103 c10::ShapeSymbol partitionSetByDimension( 104 Dimension new_size, 105 c10::ShapeSymbol symbol) { 106 auto& dims2symbols = getSetForSymbol(symbol); 107 108 if (dims2symbols.count(new_size) == 0) { 109 auto new_sym = c10::ShapeSymbol::newSymbol(); 110 dims2symbols[new_size] = new_sym; 111 return new_sym; 112 } 113 114 return dims2symbols[new_size]; 115 } 116 117 private: getSetForSymbolSetPartitioningHelper118 std::map<Dimension, c10::ShapeSymbol>& getSetForSymbol(c10::ShapeSymbol s) { 119 auto& set = sets2subsets_[s]; 120 // N.B. adding a mapping { s.static_size(), s } 121 // makes sure we preserve the fact that 122 // some dimension values remain the same 123 // across all profiled runs 124 if (s.is_static()) { 125 set.insert({s.static_size(), s}); 126 } 127 return set; 128 } 129 }; 130 131 // ShapeSymbolTable is used by Interpreter 132 // to assign dimension values to ShapeSymbols 133 // and fail a guard if the same symbol 134 // is assigned more than one dimension value. 135 struct ShapeSymbolTable { 136 // N.B. we treat static symbols as always assigned 137 // to themselves isBoundShapeSymbolTable138 bool isBound(c10::ShapeSymbol s) { 139 if (s.is_static()) { 140 return true; 141 } 142 return data_.count(s) != 0; 143 } 144 145 // N.B. we treat static symbols as always assigned 146 // to themselves getValueShapeSymbolTable147 Dimension getValue(c10::ShapeSymbol s) { 148 if (s.is_static()) { 149 return s.static_size(); 150 } 151 return data_[s]; 152 } assignShapeSymbolTable153 void assign(c10::ShapeSymbol s, Dimension v) { 154 TORCH_INTERNAL_ASSERT(!s.is_static()); 155 data_[s] = v; 156 } 157 std::map<c10::ShapeSymbol, Dimension> data_; 158 // Tries to assign dimension values from `new_sizes` to 159 // `ShapeSymbol`s `sym_shapes`. 160 // Returns `true` if every dimension value from `new_sizes` 161 // can be assigned to the corresponding `ShapeSymbol` from 162 // `sym_shapes` 163 // A dimension value can be assigned to a `ShapeSymbol` 164 // * if the symbol isn't assigned yet any dimension value 165 // * if the symbol is assigned and its value is equal to 166 // the dimension value from `new_sizes` 167 bool bindSymbolicShapes( 168 at::IntArrayRef new_sizes, 169 const c10::SymbolicShape& sym_shapes); 170 }; 171 172 struct ProfilingRecord { 173 // N.B. ProfilingRecord's copy and move c-tor are disabled, so we won't 174 // end up accidentally copying or moving ProfilingRecords whose addresses 175 // are captured in callbacks_ 176 ProfilingRecord(const ProfilingRecord&) = delete; 177 ProfilingRecord(ProfilingRecord&&) noexcept = delete; 178 TORCH_API static std::unique_ptr<ProfilingRecord> instrumentGraph( 179 const std::shared_ptr<Graph>& graph); 180 TORCH_API static void removeProfilingNodes(Block* b); 181 TORCH_API static void removeProfileCounter(Block* b); 182 183 std::shared_ptr<Graph> profiled_graph_; 184 mutable std::mutex mutex_; 185 size_t profiling_count_; 186 187 bool ready() const; 188 graphProfilingRecord189 std::shared_ptr<Graph> graph() const { 190 return profiled_graph_; 191 } 192 193 TORCH_API ProfileIValueOp* createProfileIValueNode(Value* in_val); 194 TORCH_API ProfileIValueOp* createProfileIValueNode(ArrayRef<Value*> inputs); 195 196 private: 197 ProfileOp* createProfileNode( 198 const std::function<void(Stack&)>& fp, 199 at::ArrayRef<Value*> inputs); 200 void instrumentBlock(Block* block); 201 void insertShapeProfile(Node* n, size_t offset, const TypePtr& input_type); 202 ProfilingRecord(std::shared_ptr<Graph> g); 203 }; 204 205 } // namespace torch::jit 206