qaihub_scripts/utils/utils.py

# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import gc

import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManagerAdaptor

from executorch.backends.qualcomm.utils.utils import (
    generate_qnn_executorch_option,
    update_spill_fill_size,
)


def preprocess_binary(ctx_bin, compiler_specs):
    qnn_mgr = PyQnnManagerAdaptor.QnnManager(
        generate_qnn_executorch_option(compiler_specs),
    )
    return bytes(qnn_mgr.MakeBinaryInfo(ctx_bin))


def get_encoding(
    path_to_shard: str,
    compiler_specs: str,
    get_input: bool,
    get_output: bool,
    num_input: int,
    num_output: int,
):
    encoding_list = []
    with open(path_to_shard, "rb") as f:
        ctx_bin = preprocess_binary(f.read(), compiler_specs)
        qnn_mgr = PyQnnManagerAdaptor.QnnManager(
            generate_qnn_executorch_option(compiler_specs), ctx_bin
        )
        assert qnn_mgr.Init().value == 0, "failed to load context binary"
        graph_name = qnn_mgr.GetGraphNames()[0]
        qnn_mgr.AllocateTensor(graph_name)
        if get_input:
            encoding_input = {"scale": [], "offset": []}
            for i in range(num_input):
                inputs = qnn_mgr.GetGraphInputs(graph_name)[i]
                encoding = inputs.GetEncodings()
                encoding_input["scale"].append(encoding.data["scale"].item())
                encoding_input["offset"].append(encoding.data["offset"].item())
            encoding_list.append(encoding_input)
        if get_output:
            encoding_output = {"scale": [], "offset": []}
            for i in range(num_output):
                outputs = qnn_mgr.GetGraphOutputs(graph_name)[i]
                encoding = outputs.GetEncodings()
                encoding_output["scale"].append(encoding.data["scale"].item())
                encoding_output["offset"].append(encoding.data["offset"].item())
            encoding_list.append(encoding_output)
        qnn_mgr.Destroy()
    return encoding_list


def gen_pte_from_ctx_bin(artifact, pte_names, bundle_programs, backend_config):
    edge_prog_mgrs = [prog["edge_program_manager"] for prog in bundle_programs]
    # Setup spill-fill buffer for relieving runtime memory usage
    update_spill_fill_size(
        [
            prog_mgr._edge_programs[list(prog_mgr.methods)[0]]
            for prog_mgr in edge_prog_mgrs
        ]
    )
    # Export pte files
    pte_files = []
    for pte_name in pte_names:
        print(f"{pte_name} generating...")
        pte_files.append(f"{artifact}/{pte_name}.pte")
        with open(pte_files[-1], "wb") as f:
            edge_prog_mgrs[0].to_executorch(config=backend_config).write_to_file(f)
        # GC for reducing host memory consuming
        bundle_programs.pop(0)
        edge_prog_mgrs.pop(0)
        gc.collect()

    return pte_files