1 #include <torch/csrc/jit/ir/ir.h>
2
3 namespace torch::jit {
4
5 TORCH_API void FuseInferenceOpsForSparseNN(
6 std::shared_ptr<torch::jit::Graph>& graph);
7
8 TORCH_API void EliminateTrivialEquallySplit(
9 std::shared_ptr<torch::jit::Graph>& graph);
10
11 TORCH_API void FuseListUnpack(std::shared_ptr<torch::jit::Graph>& graph);
12
13 // If outputs_are_immutable is set to false, don't replace the view ops that
14 // produce aliases of graph outputs with the copy version.
15 TORCH_API void ReplaceWithCopy(
16 std::shared_ptr<torch::jit::Graph>& graph,
17 bool outputs_are_immutable = true);
18
19 TORCH_API void ReplacePermuteWithCopy(
20 std::shared_ptr<torch::jit::Graph>& graph,
21 bool outputs_are_immutable = true);
22
23 TORCH_API void ReplaceWithMaybeCopy(
24 std::shared_ptr<torch::jit::Graph>& graph,
25 bool outputs_are_immutable = true);
26
27 TORCH_API void RemoveImmutableInputDictLookups(
28 std::shared_ptr<torch::jit::Graph>& graph);
29
30 TORCH_API bool graphHasOp(std::shared_ptr<Graph>& graph, const char* op_name);
31
32 TORCH_API bool forwardHasOp(const Module& module, const char* op_name);
33
34 TORCH_API void FuseSignLog1P(std::shared_ptr<Graph>& graph);
35
36 TORCH_API void UseVariadicTupleUnpack(const std::shared_ptr<Graph>& graph);
37
38 // c10::Symbol::fromQualString is a bit long to type everywhere, and
39 // we can't use a `using` statement since it's a static class function.
fromQualString(const std::string & qual_string)40 inline c10::Symbol fromQualString(const std::string& qual_string) {
41 return c10::Symbol::fromQualString(qual_string);
42 }
43
44 // [Create owned refs for special values]
45 // StaticRuntimeBlockRunner moves its outputs to the return value at the end of
46 // run_impl. However, there's a corner case where this can cause problems. If
47 // we return a constant, then the only reference in the constants_ array can
48 // be destroyed by this move.
49 // We could add special logic to handle this in run_impl. But since this is a
50 // relatively rare corner case, it's simpler to just add an op that does nothing
51 // but create an owned reference to its input. This owned reference can be
52 // safely moved out of StaticRuntimeBlockRunner. Note that for scalars,
53 // this actually does a copy.
54 // Note that we have to do the same thing if we are returning a value from an
55 // outer scope in a sub-block.
56 TORCH_API void CreateOwnedRefsForSpecialValues(Graph& graph);
57
58 // [Force non-empty outputs]
59 // It is technically possible for sub-blocks to not return anything. This is
60 // problematic for StaticRuntimeBlockRunner because it assumes that at least one
61 // output is being returned. Rather than slowing down SR with special logic for
62 // this corner case, we simply force blocks that return nothing to return None.
63 TORCH_API void ForceNonEmptyOutputs(Graph& graph);
64
65 TORCH_API void UseVariadicGroupedAccessor(const std::shared_ptr<Graph>& graph);
66
67 TORCH_API void EliminateExtraPermuteOps(std::shared_ptr<Graph>& graph);
68
69 TORCH_API void EliminateNoOpSlice(std::shared_ptr<Graph>& graph);
70
71 TORCH_API void UseSplitAndSqueeze(std::shared_ptr<Graph>& graph);
72
73 // [Remove unnecessary outputs]]
74 // Removes outputs to reduce compute when it is not used later in the graph.
75 // Currently used to remove the max_indices output of embedding_bag, which
76 // isn't necessary to compute the main output.
77 TORCH_API void RemoveUnnecessaryOutputs(std::shared_ptr<Graph>& graph);
78
79 TORCH_API void RemoveUnnecessaryEmbeddingBagOutputs(
80 std::shared_ptr<Graph>& graph);
81
82 TORCH_API void FuseClampNaNToNum(std::shared_ptr<Graph>& graph);
83
84 TORCH_API void UseInPlaceGetRealInputsFromOptionalInputsV2(
85 std::shared_ptr<Graph>& graph);
86
87 TORCH_API void PrepackWeights(std::shared_ptr<Graph>& graph);
88
89 } // namespace torch::jit
90