xref: /aosp_15_r20/external/executorch/examples/qualcomm/qaihub_scripts/utils/export.py (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1# Copyright (c) Qualcomm Innovation Center, Inc.
2# All rights reserved
3#
4# This source code is licensed under the BSD-style license found in the
5# LICENSE file in the root directory of this source tree.
6
7import argparse
8import io
9import json
10import logging
11import os
12from pathlib import Path
13
14import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManagerAdaptor
15import numpy as np
16
17import torch
18from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset
19from executorch.backends.qualcomm.utils.utils import (
20    draw_graph,
21    ExecutorchBackendConfig,
22    from_context_binary,
23    generate_htp_compiler_spec,
24    generate_qnn_executorch_compiler_spec,
25    generate_qnn_executorch_option,
26)
27from executorch.examples.qualcomm.qaihub_scripts.utils.utils import preprocess_binary
28from executorch.examples.qualcomm.utils import make_output_dir, SimpleADB
29from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
30
31
32def get_logger():
33    logger = logging.getLogger("aihub.utils.export")
34    handler = logging.StreamHandler()
35    handler.setFormatter(
36        logging.Formatter(
37            fmt="[%(asctime)s %(prefix)s] %(levelname)-8s: %(message)s",
38            datefmt="%Y-%m-%d %H:%M:%S",
39        )
40    )
41    logger.addHandler(handler)
42    logger.setLevel(logging.INFO)
43    logger.propagate = False
44    return logging.LoggerAdapter(logger, extra={"prefix": "UTILS.EXPORT"})
45
46
47def get_io_info(prog_info, ctx_bin_path, compiler_specs):
48    def fill_tensor_info(info, qnn_tensors, category):
49        # fetch related IO info stored in prog_info
50        for i, (name, tensor) in enumerate(prog_info[category].items()):
51            assert qnn_tensors[i].GetName() == name, "tensor name unmatch"
52            encoding = qnn_tensors[i].GetEncodings()
53            quantization_info = {
54                "scale": encoding.data["scale"].tolist(),
55                "offset": encoding.data["offset"].tolist(),
56                "axis": encoding.axis,
57            }
58            info[category].append(
59                {
60                    "name": name,
61                    "shape": tuple(tensor.shape),
62                    "dtype": str(tensor.dtype),
63                    "encoding": quantization_info,
64                }
65            )
66
67    # dictionary to be serialized into json format
68    in_key, out_key = "inputs", "outputs"
69    tensor_info = {in_key: [], out_key: []}
70
71    with open(ctx_bin_path, "rb") as f:
72        ctx_bin = preprocess_binary(f.read(), compiler_specs)
73        # leverage QNN pybind interface to retrieve tensor encodings
74        qnn_mgr = PyQnnManagerAdaptor.QnnManager(
75            generate_qnn_executorch_option(compiler_specs), ctx_bin
76        )
77        assert qnn_mgr.Init().value == 0, "failed to load context binary"
78        graph_name = qnn_mgr.GetGraphNames()[0]
79        qnn_mgr.AllocateTensor(graph_name)
80        fill_tensor_info(tensor_info, qnn_mgr.GetGraphInputs(graph_name), in_key)
81        fill_tensor_info(tensor_info, qnn_mgr.GetGraphOutputs(graph_name), out_key)
82        qnn_mgr.Destroy()
83
84    return tensor_info
85
86
87def get_ones_tensor(tensor_info, logger):
88    logger.warning(
89        f"tensor '{tensor_info['name']}' use ones tensor, "
90        "unexpected outputs might generate"
91    )
92    return torch.ones(tensor_info["shape"], dtype=eval(tensor_info["dtype"]))
93
94
95def get_tensor_with_encoding(tensor, tensor_info, logger):
96    scale = tensor_info["encoding"]["scale"]
97    offset = tensor_info["encoding"]["offset"]
98
99    # user gave wrong tensor for no encoding appears
100    if len(scale) == 0:
101        logger.error(f"tensor '{tensor_info['name']}' has no encoding")
102        return get_ones_tensor(tensor_info, logger)
103
104    # quant if tensor is float with encoding
105    return (
106        tensor.div(scale).add(offset).round().to(eval(tensor_info["dtype"]))
107        if tensor.dtype == torch.float
108        else tensor.sub(offset).mul(scale).to(torch.float32)
109    )
110
111
112def get_tensor(io_info, tensors, logger, checking_output=False):
113    # check if enough tensors have been given
114    if len(tensors) != len(io_info):
115        logger.error(
116            "given tensor numbers mismatch, "
117            f"expected {len(io_info)} but got {len(tensors)}"
118        )
119        if checking_output:
120            logger.error(
121                "output tensors failed to generate, "
122                "please check executor_runner logs."
123            )
124            exit(-1)
125
126        return [get_ones_tensor(t, logger) for t in io_info]
127
128    # list of tensors to be returned
129    ret_tensors, ret_list = [], []
130    for i, info in enumerate(io_info):
131        ret_list.append(f"input_0_{i}.raw")
132        if list(tensors[i].shape) != info["shape"]:
133            logger.error(
134                f"tensor '{info['name']}' shape mismatch: "
135                f"users > {tensors[i].shape} - "
136                f"required > {info['shape']}"
137            )
138            ret_tensors.append(get_ones_tensor(info, logger))
139            continue
140
141        ret_tensors.append(
142            tensors[i]
143            if tensors[i].dtype == eval(info["dtype"])
144            else
145            # try quant / dequant for given tensor if possible
146            ret_tensors.append(get_tensor_with_encoding(tensors[i], info, logger))
147        )
148    return [ret_tensors], " ".join(ret_list)
149
150
151def to_context_binary(
152    model_lib, soc_model, device, host, build_folder, output_folder, logger
153):
154    ext = Path(model_lib).suffix
155    if ext == ".bin":
156        return model_lib
157
158    assert (
159        device is not None
160    ), "Please assign device serial for model library conversion."
161    logger.info(f"Generating context binary for {model_lib}")
162    # leverage SimpleADB for model library conversion
163    lib_name = Path(model_lib).stem
164    sdk_root = os.getenv("QNN_SDK_ROOT")
165    adb = SimpleADB(
166        qnn_sdk=sdk_root,
167        build_path=build_folder,
168        pte_path=model_lib,
169        workspace=f"/data/local/tmp/executorch/{lib_name}",
170        device_id=device,
171        soc_model=soc_model,
172        host_id=host,
173    )
174
175    logger.info("pushing QNN libraries & tool")
176    arch = adb.arch_table[soc_model]
177    files = [
178        f"{sdk_root}/bin/aarch64-android/qnn-context-binary-generator",
179        f"{sdk_root}/lib/aarch64-android/libQnnHtp.so",
180        f"{sdk_root}/lib/aarch64-android/libQnnHtpV{arch}Stub.so",
181        f"{sdk_root}/lib/aarch64-android/libQnnHtpPrepare.so",
182        f"{sdk_root}/lib/hexagon-v{arch}/unsigned/libQnnHtpV{arch}Skel.so",
183    ]
184    adb.push(files=files)
185
186    logger.info("starting conversion")
187    commands = " ".join(
188        [
189            f"cd {adb.workspace} &&",
190            "export LD_LIBRARY_PATH=. &&",
191            "./qnn-context-binary-generator",
192            f"--model {Path(model_lib).name}",
193            "--backend libQnnHtp.so",
194            f"--binary_file {lib_name}",
195        ]
196    )
197    adb.execute(custom_runner_cmd=commands)
198
199    logger.info(f"collecting converted context binary - {lib_name}.bin")
200    adb._adb(["pull", f"{adb.workspace}/output/{lib_name}.bin", output_folder])
201
202    bin_path = f"{output_folder}/{lib_name}.bin"
203    assert os.path.exists(bin_path), (
204        "Failed to convert context binary, " "please check logcat for more details."
205    )
206    return bin_path
207
208
209def compile(args):
210    logger = get_logger()
211    logger.info("prepare compiler spec for qualcomm backend")
212
213    # setup compiler spec dedicated to QNN HTP backend
214    backend_options = generate_htp_compiler_spec(use_fp16=False)
215    # setup general compiler spec for QNN
216    compiler_specs = generate_qnn_executorch_compiler_spec(
217        soc_model=getattr(QcomChipset, args.model),
218        backend_options=backend_options,
219        is_from_context_binary=True,
220    )
221    # setup memory planning
222    memory_planning_pass = MemoryPlanningPass(
223        alloc_graph_input=args.allocate_graph_io,
224        alloc_graph_output=args.allocate_graph_io,
225    )
226
227    # dictionary for avoiding name collision when creating custom ops
228    name_map = {}
229    num_bins = len(args.artifacts)
230    for i, ctx_bin in enumerate(args.artifacts):
231        index = i + 1
232        binary_name = Path(ctx_bin).stem
233        output_dir = f"{args.output_pte_folder}/{binary_name}"
234        make_output_dir(output_dir)
235        # conversion model library into context binary if required
236        ctx_bin = to_context_binary(
237            model_lib=ctx_bin,
238            soc_model=args.model,
239            device=args.device,
240            host=args.host,
241            build_folder=args.build_folder,
242            output_folder=output_dir,
243            logger=logger,
244        )
245        # step 0: check if name collision happens for context binaries
246        logger.info(f"({index}/{num_bins}) checking custom op name of {ctx_bin}")
247        custom_op_name = f"ctx_loader_{binary_name}"
248        postfix = name_map.get(custom_op_name, 0)
249        if postfix > 0:
250            postfix += 1
251            custom_op_name = f"{custom_op_name}_{postfix}"
252        name_map[custom_op_name] = postfix
253        # step 1: generate ExportedProgram with custom op as binary loader & lower to QnnBackend
254        logger.info(f"({index}/{num_bins}) exporting program for {ctx_bin}")
255        prog_info = from_context_binary(
256            ctx_bin, custom_op_name, getattr(QcomChipset, args.model)
257        )
258        # step 2: write pte files and IO information
259        logger.info(f"({index}/{num_bins}) exporting {binary_name}.pte")
260        with open(f"{output_dir}/{binary_name}.pte", "wb") as f:
261            prog_info["edge_program_manager"].to_executorch(
262                config=ExecutorchBackendConfig(
263                    memory_planning_pass=memory_planning_pass
264                )
265            ).write_to_file(f)
266
267        logger.info(
268            f"({index}/{num_bins}) exporting network graph with {binary_name}.svg"
269        )
270        draw_graph(binary_name, output_dir, prog_info["exported_program"].graph_module)
271        logger.info(
272            f"({index}/{num_bins}) exporting graph description with {binary_name}.json"
273        )
274        with open(f"{output_dir}/{binary_name}.json", "w") as f:
275            graph_info = get_io_info(prog_info, ctx_bin, compiler_specs)
276            graph_info["soc_model"] = args.model
277            json.dump(graph_info, f, indent=2)
278
279
280def execute(args):
281    logger = get_logger()
282
283    # load graph description file
284    pte_name = Path(args.pte_directory).stem
285    graph_desc = f"{args.pte_directory}/{pte_name}.json"
286    logger.info(f"loading graph description: {graph_desc}")
287    with open(graph_desc, "r") as f:
288        graph_info = json.load(f)
289
290    # load input files
291    logger.info("loading user inputs")
292    user_inputs = []
293    for input_file in args.input_files:
294        with open(input_file, "rb") as f:
295            buffer = io.BytesIO(f.read())
296            user_inputs.append(torch.load(buffer, weights_only=False))
297
298    # check if inputs are valid, fallback to ones tensor if any
299    logger.info("generating input data")
300    inputs, input_list = get_tensor(graph_info["inputs"], user_inputs, logger)
301
302    logger.info("preparing ADB connection")
303    # leverage SimpleADB for e2e inference
304    adb = SimpleADB(
305        qnn_sdk=os.getenv("QNN_SDK_ROOT"),
306        build_path=args.build_folder,
307        pte_path=f"{args.pte_directory}/{pte_name}.pte",
308        workspace=f"/data/local/tmp/executorch/{pte_name}",
309        device_id=args.device,
310        soc_model=graph_info["soc_model"],
311        host_id=args.host,
312        shared_buffer=args.shared_buffer,
313    )
314
315    logger.info("pushing QNN libraries & other artifacts")
316    adb.push(inputs=inputs, input_list=input_list)
317
318    logger.info("starting inference")
319    adb.execute()
320
321    logger.info("collecting output data")
322
323    def post_process():
324        output_info, outputs = graph_info["outputs"], []
325        output_folder = f"{args.output_data_folder}/outputs"
326        for i, f in enumerate(sorted(os.listdir(output_folder))):
327            filename = os.path.join(output_folder, f)
328            output = np.fromfile(
329                filename, dtype=eval(f"np.{output_info[i]['dtype'].split('.')[-1]}")
330            )
331            outputs.append(torch.from_numpy(output.reshape(output_info[i]["shape"])))
332            os.remove(filename)
333
334        os.rmdir(output_folder)
335        outputs, _ = get_tensor(output_info, outputs, logger, checking_output=True)
336        # dataset length equals to 1
337        for i, output in enumerate(outputs[0]):
338            torch.save(output, f"{args.output_data_folder}/{output_info[i]['name']}.pt")
339
340    make_output_dir(args.output_data_folder)
341    adb.pull(args.output_data_folder, post_process)
342    logger.info(
343        f"execution finished, please check {args.output_data_folder} for results"
344    )
345
346
347def main():
348    parser = argparse.ArgumentParser(
349        description=(
350            "Utility to lower precompiled model libraries / "
351            "context binaries from Qualcomm AI Engine Direct to executorch"
352            " .pte program. Please visit https://aihub.qualcomm.com/ to "
353            "download your favorite models."
354        ),
355    )
356    subparsers = parser.add_subparsers(
357        title="subcommands",
358        description=(
359            "[compile]: Compile designated model libraries / "
360            "context binaries into .pte files. "
361            "[execute]: Perform on-device inference with given .pte."
362        ),
363    )
364
365    sub_compile = subparsers.add_parser(
366        name="compile",
367        help=(
368            "e.g. python export.py compile -a model.bin -m SM8650 "
369            "-b /path/to/build-android"
370        ),
371    )
372    sub_compile.add_argument(
373        "-a",
374        "--artifacts",
375        nargs="+",
376        type=str,
377        required=True,
378        help=(
379            "Path to AI HUB or QNN tool generated artifacts, "
380            "batch process is supported. "
381            "e.g. python export.py compile -a a.bin b.so c.bin "
382            "-m SM8650 -s $SERIAL_NO -b /path/to/build-android"
383        ),
384    )
385    sub_compile.add_argument(
386        "-m",
387        "--model",
388        type=str,
389        required=True,
390        help="SoC model. e.g. SM8650",
391    )
392    sub_compile.add_argument(
393        "-s",
394        "--device",
395        type=str,
396        help="Serial no of device which could be obtained by 'adb devices'.",
397    )
398    sub_compile.add_argument(
399        "-o",
400        "--output_pte_folder",
401        type=str,
402        default="./output_pte",
403        help=(
404            "Path to output artifacts, store in 'output_pte' if not given. "
405            "graph descriptions & diagram will also be exported."
406        ),
407    )
408    sub_compile.add_argument(
409        "-b",
410        "--build_folder",
411        help="Path to cmake binary directory for android, e.g., /path/to/build-android",
412        type=str,
413        required=True,
414    )
415    sub_compile.add_argument(
416        "-l",
417        "--allocate_graph_io",
418        type=bool,
419        default=True,
420        help=(
421            "True if IO tensors are pre-allocated by framework. "
422            "False for users who want to manage resources in runtime."
423        ),
424    )
425    sub_compile.add_argument(
426        "-H",
427        "--host",
428        type=str,
429        help="Gateway hostname.",
430    )
431    sub_compile.set_defaults(callback=compile)
432
433    sub_execute = subparsers.add_parser(
434        name="execute",
435        help=(
436            "e.g. python export.py execute -p model_dir -i inp.raw " "-s device_serial"
437        ),
438    )
439    sub_execute.add_argument(
440        "-p",
441        "--pte_directory",
442        type=str,
443        required=True,
444        help="Path to .pte file folder generated from 'compile' subcommand.",
445    )
446    sub_execute.add_argument(
447        "-i",
448        "--input_files",
449        nargs="*",
450        type=str,
451        help=(
452            "Path to input files stored via torch.save. "
453            "If the number / spec of input files doesn't match given .pte file, "
454            "tensors filled with value 1 will be taken as inputs."
455        ),
456    )
457    sub_execute.add_argument(
458        "-s",
459        "--device",
460        type=str,
461        required=True,
462        help="Serial no of device which could be obtained by 'adb devices'.",
463    )
464    sub_execute.add_argument(
465        "-o",
466        "--output_data_folder",
467        type=str,
468        default="./output_data",
469        help="Path to output data, store in 'output_data' if not given.",
470    )
471    sub_execute.add_argument(
472        "-b",
473        "--build_folder",
474        help="Path to cmake binary directory for android, e.g., /path/to/build-android",
475        type=str,
476        required=True,
477    )
478    sub_execute.add_argument(
479        "-z",
480        "--shared_buffer",
481        help=(
482            "Enables usage of shared buffer between application and backend for graph I/O."
483            " Please use with `--allocate_graph_io False` in compile command."
484        ),
485        action="store_true",
486    )
487    sub_execute.add_argument(
488        "-H",
489        "--host",
490        type=str,
491        help="Gateway hostname.",
492    )
493    sub_execute.set_defaults(callback=execute)
494
495    args = parser.parse_args()
496    args.callback(args)
497
498
499if __name__ == "__main__":
500    main()
501