devtools/debug_format/et_schema.py

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# pyre-unsafe

"""
Intermediate Representation of ExecuTorch Concepts in Developer Tools
"""

from __future__ import annotations

import operator
import warnings

from collections import defaultdict

from enum import Enum
from types import NoneType
from typing import Any, Dict, List, Optional, Set, Tuple

import torch
from executorch import exir
from executorch.devtools.debug_format.base_schema import (
    Node,
    OperatorGraph,
    OperatorNode,
    ValueNode,
)
from torch._subclasses import FakeTensor


# Keywords used in debug_format Metadata
class RESERVED_METADATA_ARG(Enum):
    DEBUG_HANDLE = "debug_handle"
    MODULE_STACK = "nn_module_stack"
    SOURCE_FN_STACK = "source_fn_stack"
    MODULE_TYPE = "module_type"
    PROFILE_START_TIME = "profile_start_time"
    PROFILE_END_TIME = "profile_end_time"
    LOAD_START_TIME = "load_start_time"
    LOAD_END_TIME = "load_end_time"
    MEMORY_USAGE = "memory_usage"
    DEBUG_ENTRY = "debug_entry"
    STACK_TRACE = "stack_trace"
    DEBUG_DATA = "debug_data"

    METRICS_KEYWORD = "metrics"
    PROFILE_SUMMARY_COLDSTART = "Coldstart"
    PROFILE_SUMMARY_AVERAGE = "Average"
    PROFILE_SUMMARY_P90 = "P90"
    PROFILE_SUMMARY_P10 = "P10"
    PROFILE_SUMMARY_MIN = "Min"
    PROFILE_SUMMARY_MAX = "Max"

    AGGREGATED_OP_TABLE = "Aggregated op stats"
    RUN_SUMMARY_INDIVIDUAL_RUNS_TABLE = "Run summary individual stats"
    RUN_SUMMARY_TABLE = "Aggregated run summary stats"
    OP_INSTANCE_SUMMARY_TABLE = "Individual op stats"

    TABLES_KEYWORD = "tables"
    KV_KEYWORD = "kv"
    MODEL_LOAD_TIME_KEY = "Model load time (ms)"


# Representation of an FX GraphModule as an OperatorGraph
class FXOperatorGraph(OperatorGraph):
    @staticmethod
    def _get_node_name(node: torch.fx.Node) -> str:
        if node.target == operator.getitem:
            # pyre-ignore[9]: Incompatible variable type
            node = node.args[0]
            assert isinstance(
                node, torch.fx.Node
            ), f"First argument of getitem must be a torch fx node. Got {node.args[0]}"

        # Adding the "_" to the node name prevents TensorBoard from collapsing
        # nodes with similar names that only differ by an integer at the end of
        # their name.
        return node.name + "_"

    @staticmethod
    def _get_op_name(node: torch.fx.Node) -> str:
        # pyre-ignore[16]: has no attribute `__name__`.
        return node.target.__name__

    # Given a node and its metadata (if containing module stack), update the provided module mappings
    @staticmethod
    def _update_module_mapping(
        node: Node,
        module_mapping: Dict[Tuple[str, str], List[Node]],
        metadata: Dict[str, Any],
    ):
        if (
            source_fn_stack := metadata.get("source_fn_stack")
        ) is not None and "nn_module_stack" in metadata:
            # (module name, module type)
            source_fn = source_fn_stack[-1]
            module_type = (
                source_fn[1] if isinstance(source_fn[1], str) else source_fn[1].__name__
            )
            module_mapping[(source_fn[0], module_type)].append(node)

    @staticmethod
    def _parse_args(  # noqa: C901
        node: torch.fx.Node,
        nodes: Dict[str, Node],
        const_count: int,
        module_mapping: Dict[Tuple[str, str], List[Node]],
        enable_module_hierarchy: bool,
    ) -> Tuple[List[Node], int]:
        inputs = []
        op = node.op
        name = node.name
        args = node.args
        kwargs = node.kwargs
        named_args = None
        if node.op == "call_function" and hasattr(node.target, "_schema"):
            # pyre-ignore
            named_args = node.target._schema.arguments

        for index, arg in enumerate(args):
            if isinstance(arg, torch.fx.node.Node):
                if arg.target == exir.memory.alloc:
                    continue
                arg_name = FXOperatorGraph._get_node_name(arg)
            elif isinstance(arg, (int, float, torch.dtype, str)):
                # e.g. The "0" from node.args of squeeze_copy (mm_default, 0)
                if named_args and len(named_args) > index:
                    arg_name = named_args[index].name + "_" + str(const_count)
                else:
                    arg_name = "CONST_" + str(const_count)
                const_count += 1
                const_node = ValueNode(arg_name, val=str(arg))
                nodes[arg_name] = const_node
                if enable_module_hierarchy:
                    FXOperatorGraph._update_module_mapping(
                        const_node, module_mapping, node.meta
                    )
            elif isinstance(arg, list):
                arg_name: List[str] = []
                for list_arg in arg:
                    if isinstance(list_arg, (int, float)):
                        # Consider the whole list of ints/floats as a single constant and
                        # stringify that.
                        if named_args and len(named_args) > index:
                            arg_name = [named_args[index].name + "_" + str(const_count)]
                        else:
                            arg_name = ["CONST_" + str(const_count)]
                        const_count += 1
                        const_node = ValueNode(arg_name[0], val=arg)
                        nodes[arg_name[0]] = const_node
                        if enable_module_hierarchy:
                            FXOperatorGraph._update_module_mapping(
                                const_node, module_mapping, node.meta
                            )
                        break
                    elif isinstance(list_arg, torch.fx.node.Node):
                        arg_name += [FXOperatorGraph._get_node_name(list_arg)]
                    elif list_arg is None:
                        arg_name += ["CONST_NONE" + str(const_count)]
                        const_count += 1
                        const_node = ValueNode(arg_name[-1], val=str(arg))
                        nodes[arg_name[-1]] = const_node
                        if enable_module_hierarchy:
                            FXOperatorGraph._update_module_mapping(
                                const_node, module_mapping, node.meta
                            )
                    else:
                        raise Exception(
                            f"Unsupported argument encountered in list {arg}, {type(arg[0])}"
                        )
            elif isinstance(arg, NoneType):
                continue
            else:
                raise Exception(
                    f"Unsupported argument encountered {op}, {name}, {arg}, {type(arg)}"
                )

            if isinstance(arg_name, list):
                for val in arg_name:
                    inputs.append(nodes[val])
            else:
                inputs.append(nodes[arg_name])
        for _, node in kwargs.items():
            # We can ignore the out kwarg as that's mostly used to pass in the output tensor
            # which has been memory planned. The same value is also returned by the operator
            # which is then consumed by other nodes in the graph.
            if (
                isinstance(node, torch.fx.node.Node)
                and node.target == exir.memory.alloc
            ):
                continue
            else:
                warnings.warn(
                    f"Unsupported kwarg encountered: {name}, {kwargs}", stacklevel=1
                )

        return inputs, const_count

    # Given an FX GraphModule, parse it into an OperatorGraph
    @staticmethod
    def gen_operator_graph(
        model: torch.fx.GraphModule,
        skip_stack_trace: Optional[bool] = False,
        enable_module_hierarchy: bool = False,
    ) -> FXOperatorGraph:
        graph: torch.fx.Graph = model.graph

        nodes = {}
        input_nodes = {}
        output_nodes = {}
        out_variant_output_nodes = set()
        module_mapping = defaultdict(list)

        const_count = 0
        for fx_node in graph.nodes:
            if (
                fx_node.target == exir.memory.alloc
                or fx_node.target == operator.getitem
            ):
                continue
            op = fx_node.op
            name = FXOperatorGraph._get_node_name(fx_node)
            target = fx_node.target
            args = fx_node.args
            kwargs = fx_node.kwargs
            metadata = FXOperatorGraph._extract_metadata(fx_node.meta, skip_stack_trace)
            output_shapes = FXOperatorGraph._extract_output_shapes(
                fx_node.meta.get("val")
            )
            dtype = FXOperatorGraph._extract_output_dtype(fx_node.meta.get("val")) or ""

            assert (
                op != "call_module"
            ), f"Module call not yet supported in edge model graph [.toEdge()]: {name}, {str(target)}"
            assert (
                op != "call_method"
            ), f"Call Method not yet supported in edge model graph [.toEdge()]: {name}, {str(target)}"

            # Input
            if op == "placeholder":
                node = ValueNode(
                    name,
                    output_shapes=output_shapes,
                    metadata=metadata,
                    dtype=str(dtype),
                )  # val is default arg
                input_nodes[name] = node
            # Constants
            elif op == "get_attr":
                node = ValueNode(
                    name,
                    output_shapes=output_shapes,
                    metadata=metadata,
                    dtype=str(dtype),
                )
            # Output
            elif op == "output":
                assert len(args) == 1
                # Args of op=='output' is a wrapped list of return nodes ([ret_1, ret_2, ...], )
                in_nodes = [
                    (
                        nodes[FXOperatorGraph._get_node_name(ret)]
                        if ret is not None
                        else []
                    )
                    for ret in args[0]
                ]
                node = ValueNode(
                    name,
                    inputs=in_nodes,
                    output_shapes=output_shapes,
                    metadata=metadata,
                    dtype=str(dtype),
                )
                output_nodes[name] = node
            # Op Calls
            elif op == "call_function":
                inputs, const_count = FXOperatorGraph._parse_args(
                    fx_node, nodes, const_count, module_mapping, enable_module_hierarchy
                )
                named_args = []
                if fx_node.op == "call_function" and hasattr(fx_node.target, "_schema"):
                    named_args = [arg.name for arg in fx_node.target._schema.arguments]
                node = OperatorNode(
                    name,
                    inputs=inputs,
                    output_shapes=output_shapes,
                    metadata=metadata,
                    op=FXOperatorGraph._get_op_name(fx_node),
                    named_args=named_args,
                )
                if enable_module_hierarchy:
                    FXOperatorGraph._update_module_mapping(
                        node, module_mapping, fx_node.meta
                    )

                for kwarg_name, kwarg in kwargs.items():
                    if (
                        isinstance(kwarg, torch.fx.node.Node)
                        and kwarg.target == exir.memory.alloc
                        and kwarg_name == "out"
                    ):
                        nodes[FXOperatorGraph._get_node_name(kwarg)] = node
                        out_variant_output_nodes.add(
                            FXOperatorGraph._get_node_name(kwarg)
                        )
            else:
                raise Exception(f"Unsupported op type encountered {op}, {name}")

            nodes[name] = node
        return FXOperatorGraph._compose_op_graph(
            "base",
            nodes,
            input_nodes,
            output_nodes,
            out_variant_output_nodes,
            module_mapping,
        )

    @staticmethod
    def _compose_op_graph(
        name: str,
        nodes: Dict[str, Node],
        input_nodes: Dict[
            str, Node | OperatorGraph
        ],  # Never OperatorGraph, annotated for Pyre
        output_nodes: Dict[
            str, Node | OperatorGraph
        ],  # Never OperatorGraph, annotated for Pyre
        out_variant_output_nodes: Set[str],
        module_mapping: Dict[
            Tuple[str, str], List[Any]
        ],  # Any used here for Pyre, list of Nodes
    ):
        # Generate Module Graphs
        module_graphs: List[OperatorGraph] = []
        for module_key, module_nodes in module_mapping.items():
            module_element = OperatorGraph(
                graph_name=module_key[0],
                elements=module_nodes,
                metadata={"module_type": module_key[1]},
            )
            module_graphs.append(module_element)

            # Remove module modes from main graph
            for node in module_nodes:
                nodes.pop(node.name)

        main_nodes = [
            node
            for name, node in nodes.items()
            if name not in input_nodes
            and name not in output_nodes
            and name not in out_variant_output_nodes
        ]
        main_graph = FXOperatorGraph(
            graph_name="forward", elements=main_nodes + module_graphs
        )
        input_graph = FXOperatorGraph(
            graph_name="inputs", elements=list(input_nodes.values())
        )
        output_graph = FXOperatorGraph(
            graph_name="outputs", elements=list(output_nodes.values())
        )

        return FXOperatorGraph(
            graph_name=name,
            elements=[input_graph, main_graph, output_graph],
        )

    # Given a dict, extract only the utilized metadata
    @staticmethod
    def _extract_metadata(
        metadata: Dict[str, Any], skip_stack_trace: Optional[bool] = False
    ) -> Dict[str, Any]:
        ret = {}
        if RESERVED_METADATA_ARG.DEBUG_HANDLE.value in metadata:
            ret[RESERVED_METADATA_ARG.DEBUG_HANDLE.value] = metadata[
                RESERVED_METADATA_ARG.DEBUG_HANDLE.value
            ]
        if not skip_stack_trace and RESERVED_METADATA_ARG.STACK_TRACE.value in metadata:
            ret[RESERVED_METADATA_ARG.STACK_TRACE.value] = metadata[
                RESERVED_METADATA_ARG.STACK_TRACE.value
            ]
        if RESERVED_METADATA_ARG.MODULE_STACK.value in metadata:
            ret[RESERVED_METADATA_ARG.MODULE_STACK.value] = metadata[
                RESERVED_METADATA_ARG.MODULE_STACK.value
            ]
        return ret

    @staticmethod
    def _extract_output_shapes(val: Any) -> Optional[List[List[int]]]:
        if isinstance(val, (FakeTensor, torch.Tensor)):
            # If val is a single tensor
            return [list(val.shape)]
        elif isinstance(val, tuple) and all(
            isinstance(tensor, (FakeTensor, torch.Tensor)) for tensor in val
        ):
            # If val is a tuple of tensors
            shapes = [list(fake_tensor.shape) for fake_tensor in val]
            return shapes
        else:
            return None

    @staticmethod
    def _extract_output_dtype(val: Any) -> Optional[List[torch.dtype]]:
        if isinstance(val, (FakeTensor, torch.Tensor)):
            # If val is a single tensor
            return [val.dtype]
        elif isinstance(val, tuple) and all(
            isinstance(tensor, (FakeTensor, torch.Tensor)) for tensor in val
        ):
            # If val is a tuple of tensors
            dtypes = [fake_tensor.dtype for fake_tensor in val]
            return dtypes
        else:
            return None