# Any targets that should be shared between fbcode and xplat must be defined in
# targets.bzl. This file can contain fbcode-only targets.

load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
load(":targets.bzl", "define_common_targets")
load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_verision")

oncall("executorch")

define_common_targets()

runtime.python_library(
    name = "llama_transformer",
    srcs = [
        "llama_transformer.py",
        "rope.py",
    ],
    _is_external_target = True,
    base_module = "executorch.examples.models.llama",
    visibility = [
        "//executorch/...",
        "@EXECUTORCH_CLIENTS",
    ],
    deps = [
        "//caffe2:torch",
    ],
)

runtime.python_library(
    name = "llama2_model",
    srcs = [
        "__init__.py",
        "fairseq2.py",
        "model.py",
    ],
    _is_external_target = True,
    base_module = "executorch.examples.models.llama",
    resources = {
        "//executorch/examples/models/llama/params:params": "params",
    },
    visibility = [
        "//bento/...",
        "//bento_kernels/...",
        "//executorch/...",
    ],
    deps = [
        "//caffe2:torch",
        "//executorch/examples/models:model_base",
        "//executorch/examples/models/llama:llama_transformer",
        "//executorch/examples/models:checkpoint",
    ],
)

runtime.python_binary(
    name = "export_llama",
    main_function = "executorch.examples.models.llama.export_llama.main",
    # visibility = ["//executorch/examples/..."],
    preload_deps = [
        "//executorch/extension/llm/custom_ops:model_sharding_py",
        "//executorch/extension/llm/custom_ops:custom_ops_aot_lib",
        "//executorch/kernels/quantized:aot_lib",
    ],
    deps = [
        ":export_library",
        "//caffe2:torch",
        "//executorch/extension/pybindings:aten_lib",
    ],
)

runtime.command_alias(
    name = "export_llama_qnn",
    env = {
        "LD_LIBRARY_PATH": "$(location fbsource//third-party/qualcomm/qnn/qnn-{0}:qnn_offline_compile_libs)".format(get_qnn_library_verision()),
    },
    exe = ":export_llama",
)

runtime.python_library(
    name = "export_library",
    srcs = [
        "export_llama.py",
        "export_llama_lib.py",
        "model.py",
        "source_transformation/apply_spin_quant_r1_r2.py",
        "source_transformation/attention.py",
        "source_transformation/lora.py",
        "source_transformation/pre_quantization.py",
        "source_transformation/prune_vocab.py",
        "source_transformation/quantize.py",
        "source_transformation/quantized_kv_cache.py",
        "source_transformation/rms_norm.py",
        "source_transformation/rope.py",
        "source_transformation/sdpa.py",
        "source_transformation/spin_quant.py",
        "source_transformation/vulkan_rope.py",
    ],
    _is_external_target = True,
    base_module = "executorch.examples.models.llama",
    visibility = [
        "//bento/...",
        "//bento_kernels/...",
        "//executorch/examples/...",
        "@EXECUTORCH_CLIENTS",
    ],
    deps = [
        "//ai_codesign/gen_ai/fast_hadamard_transform:fast_hadamard_transform",
        "//caffe2:torch",
        "//executorch/backends/vulkan/_passes:vulkan_passes",
        "//executorch/examples/models:model_base",
        "//executorch/examples/models:models",
        "//executorch/extension/llm/custom_ops:custom_ops_aot_py",
        "//executorch/extension/llm/export:export_lib",
        # one definition has to be included in the user of the libarary
        # depending on what library the client wants to use
        # "//executorch/extension/pybindings:aten_lib",
        # "//executorch/extension/pybindings:portable_lib",
        # "//executorch/extension/pybindings:portable_lib_plus_custom",
        "//executorch/devtools/etrecord:etrecord",
        "//executorch/util:memory_profiler",
        "//executorch/util:python_profiler",
        "fbsource//third-party/pypi/coremltools:coremltools",
        "fbsource//third-party/pypi/sentencepiece:sentencepiece",
        "//pytorch/ao:torchao",
    ],
)

runtime.python_binary(
    name = "eval_llama",
    main_function = "executorch.examples.models.llama.eval_llama.main",
    deps = [
        ":eval_library",
        "//caffe2:torch",
    ],
)

runtime.python_library(
    name = "eval_library",
    srcs = [
        "eval_llama.py",
        "eval_llama_lib.py",
        "evaluate/eager_eval.py",
    ],
    _is_external_target = True,
    base_module = "executorch.examples.models.llama",
    visibility = [
        "//bento/...",
        "//bento_kernels/...",
        "//executorch/examples/...",
        "@EXECUTORCH_CLIENTS",
    ],
    deps = [
        "fbsource//third-party/pypi/lm-eval:lm-eval",
        "fbsource//third-party/pypi/tiktoken:tiktoken",
        ":export_library",
        "//executorch/examples/models/llama/tokenizer:tiktoken_py",
        "//executorch/extension/llm/export:export_lib",
        "//executorch/extension/llm/tokenizer:tokenizer_py_lib",
        "//executorch/extension/pybindings:portable_lib",
    ],
)

runtime.python_library(
    name = "quantized_kv_cache",
    srcs = [
        "source_transformation/quantized_kv_cache.py",
    ],
    _is_external_target = True,
    visibility = ["//executorch/..."],
    deps = [
        "//caffe2:torch",
    ],
)

runtime.python_library(
    name = "sdpa",
    srcs = [
        "source_transformation/sdpa.py",
    ],
    _is_external_target = True,
    visibility = ["//executorch/..."],
    deps = [
        "//caffe2:torch",
    ],
)

runtime.python_test(
    name = "quantized_kv_cache_test",
    srcs = [
        "source_transformation/test_quantized_kv_cache.py",
    ],
    preload_deps = [
        "//executorch/extension/llm/custom_ops:custom_ops_aot_lib",
    ],
    deps = [
        ":quantized_kv_cache",
        "//caffe2:torch",
        "//executorch/examples/models/llama:llama_transformer",
    ],
)

runtime.python_test(
    name = "quantized_sdpa_with_kv_cache_test",
    srcs = [
        "source_transformation/test_sdpa_with_quantized_kv_cache.py",
    ],
    preload_deps = [
        "//executorch/extension/llm/custom_ops:custom_ops_aot_lib",
    ],
    deps = [
        ":quantized_kv_cache",
        ":sdpa",
        "//caffe2:torch",
        "//executorch/examples/models/llama:llama_transformer",
    ],
)