examples/llm_manual/export_nanogpt.py

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# export_nanogpt.py

# Load partitioner for Xnnpack backend
import torch
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner

# Model to be delegated to specific backend should use specific edge compile config
from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
from executorch.exir import to_edge

from model import GPT
from torch.export import export, export_for_training
from torch.nn.attention import sdpa_kernel, SDPBackend

model = GPT.from_pretrained("gpt2")  # use gpt2 weight as pretrained weight
example_inputs = (
    torch.randint(0, 100, (1, model.config.block_size), dtype=torch.long),
)
dynamic_shape = ({1: torch.export.Dim("token_dim", max=model.config.block_size)},)

# Trace the model, converting it to a portable intermediate representation.
# The torch.no_grad() call tells PyTorch to exclude training-specific logic.
with sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
    m = export_for_training(
        model, example_inputs, dynamic_shapes=dynamic_shape
    ).module()
    traced_model = export(m, example_inputs, dynamic_shapes=dynamic_shape)

# Convert the model into a runnable ExecuTorch program.
# To be further lowered to Xnnpack backend, `traced_model` needs xnnpack-specific edge compile config
edge_config = get_xnnpack_edge_compile_config()
edge_manager = to_edge(traced_model, compile_config=edge_config)

# Delegate exported model to Xnnpack backend by invoking `to_backend` function with Xnnpack partitioner.
edge_manager = edge_manager.to_backend(XnnpackPartitioner())
et_program = edge_manager.to_executorch()

# Save the Xnnpack-delegated ExecuTorch program to a file.
with open("nanogpt.pte", "wb") as file:
    file.write(et_program.buffer)