1# flake8: noqa: B950 2from ._internal import register_artifact, register_log 3 4 5DYNAMIC = [ 6 "torch.fx.experimental.symbolic_shapes", 7 "torch.fx.experimental.sym_node", 8 "torch.fx.experimental.recording", 9] 10DISTRIBUTED = [ 11 "torch.distributed", 12 "torch._dynamo.backends.distributed", 13 "torch.nn.parallel.distributed", 14] 15 16register_log("dynamo", ["torch._dynamo", *DYNAMIC]) 17register_log("fake_tensor", ["torch._subclasses.fake_tensor"]) 18register_log("aot", ["torch._functorch.aot_autograd", "torch._functorch._aot_autograd"]) 19register_log("autograd", "torch.autograd") 20register_log("inductor", ["torch._inductor", "torch._inductor.cudagraph_trees"]) 21 22register_artifact( 23 "cudagraphs", 24 "Logs information from wrapping inductor generated code with cudagraphs.", 25) 26 27register_log("dynamic", DYNAMIC) 28register_log("torch", "torch") 29register_log("distributed", DISTRIBUTED) 30register_log( 31 "c10d", ["torch.distributed.distributed_c10d", "torch.distributed.rendezvous"] 32) 33register_log( 34 "ddp", ["torch.nn.parallel.distributed", "torch._dynamo.backends.distributed"] 35) 36register_log("pp", ["torch.distributed.pipelining"]) 37register_log("fsdp", ["torch.distributed.fsdp", "torch.distributed._composable.fsdp"]) 38register_log("dtensor", ["torch.distributed._tensor", "torch.distributed.tensor"]) 39register_log("onnx", "torch.onnx") 40register_log( 41 "export", 42 [ 43 "torch._dynamo", 44 "torch.export", 45 "torch.export.dynamic_shapes", 46 *DYNAMIC, 47 "torch._export.converter", 48 "torch._export.non_strict_utils", 49 ], 50) 51 52register_artifact( 53 "guards", 54 "This prints the guards for every compiled Dynamo frame. It does not tell you where the guards come from.", 55 visible=True, 56) 57register_artifact("verbose_guards", "", off_by_default=True) 58register_artifact( 59 "bytecode", 60 "Prints the original and modified bytecode from Dynamo. Mostly useful if you're debugging our bytecode generation in Dynamo.", 61 off_by_default=True, 62) 63register_artifact( 64 "graph", 65 "Prints the dynamo traced graph (prior to AOTDispatch) in a table. If you prefer python code use `graph_code` instead. ", 66) 67register_artifact("graph_code", "Like `graph`, but gives you the Python code instead.") 68register_artifact( 69 "graph_sizes", "Prints the sizes of all FX nodes in the dynamo graph." 70) 71register_artifact( 72 "trace_source", 73 "As we execute bytecode, prints the file name / line number we are processing and the actual source code. Useful with `bytecode`", 74) 75register_artifact( 76 "trace_call", 77 "Like trace_source, but it will give you the per-expression blow-by-blow if your Python is recent enough.", 78) 79register_artifact( 80 "trace_bytecode", 81 "As we trace bytecode, prints the instruction and the current stack.", 82) 83register_artifact( 84 "aot_graphs", 85 "Prints the FX forward and backward graph generated by AOTDispatch, after partitioning. Useful to understand what's being given to Inductor", 86 visible=True, 87) 88register_artifact( 89 "aot_joint_graph", 90 "Print FX joint graph from AOTAutograd, prior to partitioning. Useful for debugging partitioning", 91) 92register_artifact( 93 "aot_graphs_effects", 94 "Prints the FX forward and backward graph generated by AOTDispatch, useful for debugging effects processing.", 95 visible=True, 96) 97register_artifact( 98 "post_grad_graphs", 99 "Prints the FX graph generated by post grad passes. Useful to understand what's being given to Inductor after post grad passes", 100) 101register_artifact( 102 "compiled_autograd", 103 "Prints various logs in compiled_autograd, including but not limited to the graphs. Useful for debugging compiled_autograd.", 104 visible=True, 105) 106register_artifact( 107 "compiled_autograd_verbose", 108 "Will affect performance. Prints compiled_autograd logs with C++ info e.g. autograd node -> fx node mapping", 109 off_by_default=True, 110) 111register_artifact( 112 "ddp_graphs", 113 "Only relevant for compiling DDP. DDP splits into multiple graphs to trigger comms early. This will print each individual graph here.", 114) 115register_artifact( 116 "recompiles", 117 "Prints the reason why we recompiled a graph. Very, very useful.", 118 visible=True, 119) 120register_artifact( 121 "recompiles_verbose", 122 "Prints all guard checks that fail during a recompilation. " 123 "At runtime, Dynamo will stop at the first failed check for each failing guard. " 124 "So not all logged failing checks are actually ran by Dynamo.", 125 visible=True, 126 off_by_default=True, 127) 128register_artifact( 129 "graph_breaks", 130 "Prints whenever Dynamo decides that it needs to graph break (i.e. create a new graph). Useful for debugging why torch.compile has poor performance", 131 visible=True, 132) 133register_artifact( 134 "not_implemented", 135 "Prints log messages whenever we return NotImplemented in a multi-dispatch, letting you trace through each object we attempted to dispatch to", 136) 137register_artifact( 138 "output_code", 139 "Prints the code that Inductor generates (either Triton or C++)", 140 off_by_default=True, 141 visible=True, 142) 143register_artifact( 144 "kernel_code", 145 "Prints the code that Inductor generates (on a per-kernel basis)", 146 off_by_default=True, 147 visible=True, 148) 149register_artifact( 150 "schedule", 151 "Inductor scheduler information. Useful if working on Inductor fusion algo", 152 off_by_default=True, 153) 154register_artifact("perf_hints", "", off_by_default=True) 155register_artifact("onnx_diagnostics", "", off_by_default=True) 156register_artifact( 157 "fusion", 158 "Detailed Inductor fusion decisions. More detailed than 'schedule'", 159 off_by_default=True, 160) 161register_artifact( 162 "loop_ordering", 163 "Logs related to loop ordering", 164 off_by_default=True, 165) 166register_artifact( 167 "overlap", 168 "Detailed Inductor compute/comm overlap decisions", 169 off_by_default=True, 170) 171register_artifact( 172 "sym_node", 173 "Logs extra info for various SymNode operations", 174 off_by_default=True, 175) 176register_artifact( 177 "trace_shape_events", 178 "Logs traces for every ShapeEnv operation that we record for replay", 179 off_by_default=True, 180) 181register_artifact( 182 "cudagraph_static_inputs", 183 "Logs static inputs handling in dynamo, AOT, and cudagraphs", 184 off_by_default=True, 185) 186register_artifact( 187 "benchmarking", 188 "Detailed Inductor benchmarking information.", 189 off_by_default=True, 190) 191 192register_artifact("custom_format_test_artifact", "Testing only", log_format="") 193