1# Copyright (c) Qualcomm Innovation Center, Inc. 2# All rights reserved 3# 4# This source code is licensed under the BSD-style license found in the 5# LICENSE file in the root directory of this source tree. 6 7import argparse 8import io 9import json 10import logging 11import os 12from pathlib import Path 13 14import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManagerAdaptor 15import numpy as np 16 17import torch 18from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset 19from executorch.backends.qualcomm.utils.utils import ( 20 draw_graph, 21 ExecutorchBackendConfig, 22 from_context_binary, 23 generate_htp_compiler_spec, 24 generate_qnn_executorch_compiler_spec, 25 generate_qnn_executorch_option, 26) 27from executorch.examples.qualcomm.qaihub_scripts.utils.utils import preprocess_binary 28from executorch.examples.qualcomm.utils import make_output_dir, SimpleADB 29from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass 30 31 32def get_logger(): 33 logger = logging.getLogger("aihub.utils.export") 34 handler = logging.StreamHandler() 35 handler.setFormatter( 36 logging.Formatter( 37 fmt="[%(asctime)s %(prefix)s] %(levelname)-8s: %(message)s", 38 datefmt="%Y-%m-%d %H:%M:%S", 39 ) 40 ) 41 logger.addHandler(handler) 42 logger.setLevel(logging.INFO) 43 logger.propagate = False 44 return logging.LoggerAdapter(logger, extra={"prefix": "UTILS.EXPORT"}) 45 46 47def get_io_info(prog_info, ctx_bin_path, compiler_specs): 48 def fill_tensor_info(info, qnn_tensors, category): 49 # fetch related IO info stored in prog_info 50 for i, (name, tensor) in enumerate(prog_info[category].items()): 51 assert qnn_tensors[i].GetName() == name, "tensor name unmatch" 52 encoding = qnn_tensors[i].GetEncodings() 53 quantization_info = { 54 "scale": encoding.data["scale"].tolist(), 55 "offset": encoding.data["offset"].tolist(), 56 "axis": encoding.axis, 57 } 58 info[category].append( 59 { 60 "name": name, 61 "shape": tuple(tensor.shape), 62 "dtype": str(tensor.dtype), 63 "encoding": quantization_info, 64 } 65 ) 66 67 # dictionary to be serialized into json format 68 in_key, out_key = "inputs", "outputs" 69 tensor_info = {in_key: [], out_key: []} 70 71 with open(ctx_bin_path, "rb") as f: 72 ctx_bin = preprocess_binary(f.read(), compiler_specs) 73 # leverage QNN pybind interface to retrieve tensor encodings 74 qnn_mgr = PyQnnManagerAdaptor.QnnManager( 75 generate_qnn_executorch_option(compiler_specs), ctx_bin 76 ) 77 assert qnn_mgr.Init().value == 0, "failed to load context binary" 78 graph_name = qnn_mgr.GetGraphNames()[0] 79 qnn_mgr.AllocateTensor(graph_name) 80 fill_tensor_info(tensor_info, qnn_mgr.GetGraphInputs(graph_name), in_key) 81 fill_tensor_info(tensor_info, qnn_mgr.GetGraphOutputs(graph_name), out_key) 82 qnn_mgr.Destroy() 83 84 return tensor_info 85 86 87def get_ones_tensor(tensor_info, logger): 88 logger.warning( 89 f"tensor '{tensor_info['name']}' use ones tensor, " 90 "unexpected outputs might generate" 91 ) 92 return torch.ones(tensor_info["shape"], dtype=eval(tensor_info["dtype"])) 93 94 95def get_tensor_with_encoding(tensor, tensor_info, logger): 96 scale = tensor_info["encoding"]["scale"] 97 offset = tensor_info["encoding"]["offset"] 98 99 # user gave wrong tensor for no encoding appears 100 if len(scale) == 0: 101 logger.error(f"tensor '{tensor_info['name']}' has no encoding") 102 return get_ones_tensor(tensor_info, logger) 103 104 # quant if tensor is float with encoding 105 return ( 106 tensor.div(scale).add(offset).round().to(eval(tensor_info["dtype"])) 107 if tensor.dtype == torch.float 108 else tensor.sub(offset).mul(scale).to(torch.float32) 109 ) 110 111 112def get_tensor(io_info, tensors, logger, checking_output=False): 113 # check if enough tensors have been given 114 if len(tensors) != len(io_info): 115 logger.error( 116 "given tensor numbers mismatch, " 117 f"expected {len(io_info)} but got {len(tensors)}" 118 ) 119 if checking_output: 120 logger.error( 121 "output tensors failed to generate, " 122 "please check executor_runner logs." 123 ) 124 exit(-1) 125 126 return [get_ones_tensor(t, logger) for t in io_info] 127 128 # list of tensors to be returned 129 ret_tensors, ret_list = [], [] 130 for i, info in enumerate(io_info): 131 ret_list.append(f"input_0_{i}.raw") 132 if list(tensors[i].shape) != info["shape"]: 133 logger.error( 134 f"tensor '{info['name']}' shape mismatch: " 135 f"users > {tensors[i].shape} - " 136 f"required > {info['shape']}" 137 ) 138 ret_tensors.append(get_ones_tensor(info, logger)) 139 continue 140 141 ret_tensors.append( 142 tensors[i] 143 if tensors[i].dtype == eval(info["dtype"]) 144 else 145 # try quant / dequant for given tensor if possible 146 ret_tensors.append(get_tensor_with_encoding(tensors[i], info, logger)) 147 ) 148 return [ret_tensors], " ".join(ret_list) 149 150 151def to_context_binary( 152 model_lib, soc_model, device, host, build_folder, output_folder, logger 153): 154 ext = Path(model_lib).suffix 155 if ext == ".bin": 156 return model_lib 157 158 assert ( 159 device is not None 160 ), "Please assign device serial for model library conversion." 161 logger.info(f"Generating context binary for {model_lib}") 162 # leverage SimpleADB for model library conversion 163 lib_name = Path(model_lib).stem 164 sdk_root = os.getenv("QNN_SDK_ROOT") 165 adb = SimpleADB( 166 qnn_sdk=sdk_root, 167 build_path=build_folder, 168 pte_path=model_lib, 169 workspace=f"/data/local/tmp/executorch/{lib_name}", 170 device_id=device, 171 soc_model=soc_model, 172 host_id=host, 173 ) 174 175 logger.info("pushing QNN libraries & tool") 176 arch = adb.arch_table[soc_model] 177 files = [ 178 f"{sdk_root}/bin/aarch64-android/qnn-context-binary-generator", 179 f"{sdk_root}/lib/aarch64-android/libQnnHtp.so", 180 f"{sdk_root}/lib/aarch64-android/libQnnHtpV{arch}Stub.so", 181 f"{sdk_root}/lib/aarch64-android/libQnnHtpPrepare.so", 182 f"{sdk_root}/lib/hexagon-v{arch}/unsigned/libQnnHtpV{arch}Skel.so", 183 ] 184 adb.push(files=files) 185 186 logger.info("starting conversion") 187 commands = " ".join( 188 [ 189 f"cd {adb.workspace} &&", 190 "export LD_LIBRARY_PATH=. &&", 191 "./qnn-context-binary-generator", 192 f"--model {Path(model_lib).name}", 193 "--backend libQnnHtp.so", 194 f"--binary_file {lib_name}", 195 ] 196 ) 197 adb.execute(custom_runner_cmd=commands) 198 199 logger.info(f"collecting converted context binary - {lib_name}.bin") 200 adb._adb(["pull", f"{adb.workspace}/output/{lib_name}.bin", output_folder]) 201 202 bin_path = f"{output_folder}/{lib_name}.bin" 203 assert os.path.exists(bin_path), ( 204 "Failed to convert context binary, " "please check logcat for more details." 205 ) 206 return bin_path 207 208 209def compile(args): 210 logger = get_logger() 211 logger.info("prepare compiler spec for qualcomm backend") 212 213 # setup compiler spec dedicated to QNN HTP backend 214 backend_options = generate_htp_compiler_spec(use_fp16=False) 215 # setup general compiler spec for QNN 216 compiler_specs = generate_qnn_executorch_compiler_spec( 217 soc_model=getattr(QcomChipset, args.model), 218 backend_options=backend_options, 219 is_from_context_binary=True, 220 ) 221 # setup memory planning 222 memory_planning_pass = MemoryPlanningPass( 223 alloc_graph_input=args.allocate_graph_io, 224 alloc_graph_output=args.allocate_graph_io, 225 ) 226 227 # dictionary for avoiding name collision when creating custom ops 228 name_map = {} 229 num_bins = len(args.artifacts) 230 for i, ctx_bin in enumerate(args.artifacts): 231 index = i + 1 232 binary_name = Path(ctx_bin).stem 233 output_dir = f"{args.output_pte_folder}/{binary_name}" 234 make_output_dir(output_dir) 235 # conversion model library into context binary if required 236 ctx_bin = to_context_binary( 237 model_lib=ctx_bin, 238 soc_model=args.model, 239 device=args.device, 240 host=args.host, 241 build_folder=args.build_folder, 242 output_folder=output_dir, 243 logger=logger, 244 ) 245 # step 0: check if name collision happens for context binaries 246 logger.info(f"({index}/{num_bins}) checking custom op name of {ctx_bin}") 247 custom_op_name = f"ctx_loader_{binary_name}" 248 postfix = name_map.get(custom_op_name, 0) 249 if postfix > 0: 250 postfix += 1 251 custom_op_name = f"{custom_op_name}_{postfix}" 252 name_map[custom_op_name] = postfix 253 # step 1: generate ExportedProgram with custom op as binary loader & lower to QnnBackend 254 logger.info(f"({index}/{num_bins}) exporting program for {ctx_bin}") 255 prog_info = from_context_binary( 256 ctx_bin, custom_op_name, getattr(QcomChipset, args.model) 257 ) 258 # step 2: write pte files and IO information 259 logger.info(f"({index}/{num_bins}) exporting {binary_name}.pte") 260 with open(f"{output_dir}/{binary_name}.pte", "wb") as f: 261 prog_info["edge_program_manager"].to_executorch( 262 config=ExecutorchBackendConfig( 263 memory_planning_pass=memory_planning_pass 264 ) 265 ).write_to_file(f) 266 267 logger.info( 268 f"({index}/{num_bins}) exporting network graph with {binary_name}.svg" 269 ) 270 draw_graph(binary_name, output_dir, prog_info["exported_program"].graph_module) 271 logger.info( 272 f"({index}/{num_bins}) exporting graph description with {binary_name}.json" 273 ) 274 with open(f"{output_dir}/{binary_name}.json", "w") as f: 275 graph_info = get_io_info(prog_info, ctx_bin, compiler_specs) 276 graph_info["soc_model"] = args.model 277 json.dump(graph_info, f, indent=2) 278 279 280def execute(args): 281 logger = get_logger() 282 283 # load graph description file 284 pte_name = Path(args.pte_directory).stem 285 graph_desc = f"{args.pte_directory}/{pte_name}.json" 286 logger.info(f"loading graph description: {graph_desc}") 287 with open(graph_desc, "r") as f: 288 graph_info = json.load(f) 289 290 # load input files 291 logger.info("loading user inputs") 292 user_inputs = [] 293 for input_file in args.input_files: 294 with open(input_file, "rb") as f: 295 buffer = io.BytesIO(f.read()) 296 user_inputs.append(torch.load(buffer, weights_only=False)) 297 298 # check if inputs are valid, fallback to ones tensor if any 299 logger.info("generating input data") 300 inputs, input_list = get_tensor(graph_info["inputs"], user_inputs, logger) 301 302 logger.info("preparing ADB connection") 303 # leverage SimpleADB for e2e inference 304 adb = SimpleADB( 305 qnn_sdk=os.getenv("QNN_SDK_ROOT"), 306 build_path=args.build_folder, 307 pte_path=f"{args.pte_directory}/{pte_name}.pte", 308 workspace=f"/data/local/tmp/executorch/{pte_name}", 309 device_id=args.device, 310 soc_model=graph_info["soc_model"], 311 host_id=args.host, 312 shared_buffer=args.shared_buffer, 313 ) 314 315 logger.info("pushing QNN libraries & other artifacts") 316 adb.push(inputs=inputs, input_list=input_list) 317 318 logger.info("starting inference") 319 adb.execute() 320 321 logger.info("collecting output data") 322 323 def post_process(): 324 output_info, outputs = graph_info["outputs"], [] 325 output_folder = f"{args.output_data_folder}/outputs" 326 for i, f in enumerate(sorted(os.listdir(output_folder))): 327 filename = os.path.join(output_folder, f) 328 output = np.fromfile( 329 filename, dtype=eval(f"np.{output_info[i]['dtype'].split('.')[-1]}") 330 ) 331 outputs.append(torch.from_numpy(output.reshape(output_info[i]["shape"]))) 332 os.remove(filename) 333 334 os.rmdir(output_folder) 335 outputs, _ = get_tensor(output_info, outputs, logger, checking_output=True) 336 # dataset length equals to 1 337 for i, output in enumerate(outputs[0]): 338 torch.save(output, f"{args.output_data_folder}/{output_info[i]['name']}.pt") 339 340 make_output_dir(args.output_data_folder) 341 adb.pull(args.output_data_folder, post_process) 342 logger.info( 343 f"execution finished, please check {args.output_data_folder} for results" 344 ) 345 346 347def main(): 348 parser = argparse.ArgumentParser( 349 description=( 350 "Utility to lower precompiled model libraries / " 351 "context binaries from Qualcomm AI Engine Direct to executorch" 352 " .pte program. Please visit https://aihub.qualcomm.com/ to " 353 "download your favorite models." 354 ), 355 ) 356 subparsers = parser.add_subparsers( 357 title="subcommands", 358 description=( 359 "[compile]: Compile designated model libraries / " 360 "context binaries into .pte files. " 361 "[execute]: Perform on-device inference with given .pte." 362 ), 363 ) 364 365 sub_compile = subparsers.add_parser( 366 name="compile", 367 help=( 368 "e.g. python export.py compile -a model.bin -m SM8650 " 369 "-b /path/to/build-android" 370 ), 371 ) 372 sub_compile.add_argument( 373 "-a", 374 "--artifacts", 375 nargs="+", 376 type=str, 377 required=True, 378 help=( 379 "Path to AI HUB or QNN tool generated artifacts, " 380 "batch process is supported. " 381 "e.g. python export.py compile -a a.bin b.so c.bin " 382 "-m SM8650 -s $SERIAL_NO -b /path/to/build-android" 383 ), 384 ) 385 sub_compile.add_argument( 386 "-m", 387 "--model", 388 type=str, 389 required=True, 390 help="SoC model. e.g. SM8650", 391 ) 392 sub_compile.add_argument( 393 "-s", 394 "--device", 395 type=str, 396 help="Serial no of device which could be obtained by 'adb devices'.", 397 ) 398 sub_compile.add_argument( 399 "-o", 400 "--output_pte_folder", 401 type=str, 402 default="./output_pte", 403 help=( 404 "Path to output artifacts, store in 'output_pte' if not given. " 405 "graph descriptions & diagram will also be exported." 406 ), 407 ) 408 sub_compile.add_argument( 409 "-b", 410 "--build_folder", 411 help="Path to cmake binary directory for android, e.g., /path/to/build-android", 412 type=str, 413 required=True, 414 ) 415 sub_compile.add_argument( 416 "-l", 417 "--allocate_graph_io", 418 type=bool, 419 default=True, 420 help=( 421 "True if IO tensors are pre-allocated by framework. " 422 "False for users who want to manage resources in runtime." 423 ), 424 ) 425 sub_compile.add_argument( 426 "-H", 427 "--host", 428 type=str, 429 help="Gateway hostname.", 430 ) 431 sub_compile.set_defaults(callback=compile) 432 433 sub_execute = subparsers.add_parser( 434 name="execute", 435 help=( 436 "e.g. python export.py execute -p model_dir -i inp.raw " "-s device_serial" 437 ), 438 ) 439 sub_execute.add_argument( 440 "-p", 441 "--pte_directory", 442 type=str, 443 required=True, 444 help="Path to .pte file folder generated from 'compile' subcommand.", 445 ) 446 sub_execute.add_argument( 447 "-i", 448 "--input_files", 449 nargs="*", 450 type=str, 451 help=( 452 "Path to input files stored via torch.save. " 453 "If the number / spec of input files doesn't match given .pte file, " 454 "tensors filled with value 1 will be taken as inputs." 455 ), 456 ) 457 sub_execute.add_argument( 458 "-s", 459 "--device", 460 type=str, 461 required=True, 462 help="Serial no of device which could be obtained by 'adb devices'.", 463 ) 464 sub_execute.add_argument( 465 "-o", 466 "--output_data_folder", 467 type=str, 468 default="./output_data", 469 help="Path to output data, store in 'output_data' if not given.", 470 ) 471 sub_execute.add_argument( 472 "-b", 473 "--build_folder", 474 help="Path to cmake binary directory for android, e.g., /path/to/build-android", 475 type=str, 476 required=True, 477 ) 478 sub_execute.add_argument( 479 "-z", 480 "--shared_buffer", 481 help=( 482 "Enables usage of shared buffer between application and backend for graph I/O." 483 " Please use with `--allocate_graph_io False` in compile command." 484 ), 485 action="store_true", 486 ) 487 sub_execute.add_argument( 488 "-H", 489 "--host", 490 type=str, 491 help="Gateway hostname.", 492 ) 493 sub_execute.set_defaults(callback=execute) 494 495 args = parser.parse_args() 496 args.callback(args) 497 498 499if __name__ == "__main__": 500 main() 501