# Copyright 2023-2024 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. # pyre-unsafe # # Main implementation of AoT flow to partition and preprocess for Arm target # backends. Converts via TOSA as an intermediate form supported by AoT and # JIT compiler flows. # import logging import os from typing import final, List, Optional import serializer.tosa_serializer as ts from executorch.backends.arm.arm_vela import vela_compile from executorch.backends.arm.operators.node_visitor import get_node_visitors from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.arm._passes.arm_pass_manager import ( ArmPassManager, ) # usort: skip from executorch.backends.arm.process_node import ( process_call_function, process_output, process_placeholder, ) from executorch.backends.arm.tosa_utils import dbg_fail, dbg_tosa_dump from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult from executorch.exir.backend.compile_spec_schema import CompileSpec from torch.export.exported_program import ExportedProgram # TOSA backend debug functionality logger = logging.getLogger(__name__) logger.setLevel(logging.WARNING) TOSA_DBG_VERBOSE = os.environ.get("TOSA_DBG_VERBOSE") == "1" if TOSA_DBG_VERBOSE: logging.basicConfig(level=logging.INFO) logger.setLevel(logging.INFO) class ArmCompileSpecBuilder: def __init__(self): self.compile_spec: List[CompileSpec] = [] self.compiler_flags = [] self.output_format = None self.path_for_intermediates = None # TODO MLETORCH-265 Remove permute_nhwc flag self.permute_nhwc = False self.quantize_io = False self.tosa_version = None def ethosu_compile_spec( self, config: str, system_config: str, memory_mode: str, extra_flags: Optional[str] = None, config_ini: Optional[str] = "Arm/vela.ini", ) -> "ArmCompileSpecBuilder": """ Generate compile spec for Ethos-U NPU Args: config: Ethos-U accelerator configuration, e.g. ethos-u55-128 system_config: System configuration to select from the Vel configuration file memory_mode: Memory mode to select from the Vela configuration file extra_flags: Extra flags for the Vela compiler config_ini: Vela configuration file(s) in Python ConfigParser .ini file format """ assert ( self.output_format is None ), f"Output format already set to f{self.output_format}" self.output_format = "vela" self.compiler_flags = [ f"--accelerator-config={config}", f"--config={config_ini}", ] if system_config is not None: self.compiler_flags.append(f"--system-config={system_config}") if memory_mode is not None: self.compiler_flags.append(f"--memory-mode={memory_mode}") if extra_flags is not None: self.compiler_flags.append(extra_flags) base_tosa_version = "TOSA-0.80.0+BI" if "U55" in config: # Add the Ethos-U55 extension marker base_tosa_version += "+u55" self.tosa_version = TosaSpecification.create_from_string(base_tosa_version) return self def tosa_compile_spec(self, tosa_version: str) -> "ArmCompileSpecBuilder": """ Generate compile spec for TOSA flatbuffer output """ assert ( self.output_format is None ), f"Output format already set: {self.output_format}" self.output_format = "tosa" self.tosa_version = TosaSpecification.create_from_string(tosa_version) return self def dump_intermediate_artifacts_to( self, output_path: str ) -> "ArmCompileSpecBuilder": """ Sets a path for dumping intermediate results during such as tosa and pte. """ self.path_for_intermediates = output_path return self def set_permute_memory_format( self, set_nhwc_permutation: bool = True ) -> "ArmCompileSpecBuilder": """ Permute to channel last in compiler and runtime. Compilation and runtime will convert rank 4 inputs to channel last for each sub-graph. """ self.permute_nhwc = set_nhwc_permutation return self def set_quantize_io(self, quantize_io: bool = False) -> "ArmCompileSpecBuilder": """ Quantization of inputs and dequantization of outputs for cases where whole graph is quantized and method signature is not of quantized type. """ self.quantize_io = quantize_io return self def build(self) -> List[CompileSpec]: """ Generate a list of compile spec objects from the builder """ assert self.tosa_version # Always supply a TOSA version self.compile_spec = [ CompileSpec("tosa_version", str(self.tosa_version).encode()) ] if self.output_format == "vela": self.compile_spec += [ CompileSpec("output_format", "vela".encode()), CompileSpec("compile_flags", " ".join(self.compiler_flags).encode()), ] elif self.output_format == "tosa": self.compile_spec.append(CompileSpec("output_format", "tosa".encode())) if self.path_for_intermediates is not None: self.compile_spec.append( CompileSpec("debug_artifact_path", self.path_for_intermediates.encode()) ) if self.permute_nhwc: self.compile_spec.append( CompileSpec("permute_memory_format", "nhwc".encode()) ) if self.quantize_io: self.compile_spec.append(CompileSpec("quantize_io", "True".encode())) return self.compile_spec def is_permute_memory(compile_spec: List[CompileSpec]) -> bool: for spec in compile_spec: if spec.key == "permute_memory_format": return spec.value.decode() == "nhwc" return False def is_tosa(compile_spec: List[CompileSpec]) -> bool: for spec in compile_spec: if spec.key == "output_format": return spec.value.decode() == "tosa" return False def get_intermediate_path(compile_spec: List[CompileSpec]) -> Optional[str]: for spec in compile_spec: if spec.key == "debug_artifact_path": return spec.value.decode() return None def _get_first_delegation_tag(graph_module) -> str | None: """Get the first delegation tag from the graph_module or return None.""" for node in graph_module.graph.nodes: tag = node.meta.get("delegation_tag") if tag: return tag logger.debug("No delegation tag found in partition.") return None @final class ArmBackend(BackendDetails): @staticmethod def preprocess( # noqa: C901 edge_program: ExportedProgram, compile_spec: List[CompileSpec], ) -> PreprocessResult: logger.info("ArmBackend::preprocess") # if a debug/test build capture output files from TOSA stage artifact_path = None output_format = "" compile_flags = [] for spec in compile_spec: if spec.key == "debug_artifact_path": artifact_path = spec.value.decode() if spec.key == "output_format": output_format = spec.value.decode() if spec.key == "compile_flags": compile_flags.append(spec.value.decode()) # Check that the output format is set in the compile spec if not output_format: raise RuntimeError("output format is required") tosa_spec = TosaSpecification.create_from_compilespecs(compile_spec) assert ( tosa_spec is not None ), "TOSA backend needs a TOSA version specified in the CompileSpec!" if output_format == "vela" and len(compile_flags) == 0: # Not testing for compile_flags correctness here, just that they are # present. The compiler will give errors if they are not valid. raise RuntimeError("compile flags are required for vela output format") logger.info(f"Converting ExportedProgram to TOSA: {tosa_spec}") # Converted output for this subgraph, serializer needs path early as it emits # const data directly. Path created and data written only in debug builds. tosa_graph = ts.TosaSerializer(artifact_path) graph_module = ArmPassManager().transform_to_backend_pipeline( exported_program=edge_program, compile_spec=compile_spec ) node_visitors = get_node_visitors(edge_program, tosa_spec) for node in graph_module.graph.nodes: if node.op == "call_function": process_call_function(node, tosa_graph, node_visitors, tosa_spec) elif node.op == "placeholder": process_placeholder(node, tosa_graph, edge_program, tosa_spec) elif node.op == "output": process_output(node, tosa_graph) else: # This will only happen if an unpartitioned graph is passed without # any checking of compatibility. dbg_fail(node, tosa_graph, artifact_path) # TODO: It would be awesome if this dump could somehow be done on top level and not here. # Problem is that the desc.json has to be created on the tosa_graph object, which we can't # access from top level. if artifact_path: tag = _get_first_delegation_tag(graph_module) dbg_tosa_dump( tosa_graph, artifact_path, suffix="{}".format(f"_{tag}" if tag else ""), ) # Serialize and return the program. While we have always produced TOSA # output as an intermediate, some flows compile to device binaries in # preprocess and some consume TOSA fb directly. if output_format == "vela": # Emit vela_bin_stream format binary = vela_compile(tosa_graph, compile_flags) elif output_format == "tosa": # Emit TOSA flatbuffer binary = bytes(tosa_graph.serialize()) else: raise RuntimeError(f"Unknown format {output_format}") # Continueing from above. Can I put tosa_graph into this function? # debug_handle_map = ... return PreprocessResult(processed_bytes=binary)