xref: /aosp_15_r20/external/executorch/examples/models/llama/TARGETS (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1# Any targets that should be shared between fbcode and xplat must be defined in
2# targets.bzl. This file can contain fbcode-only targets.
3
4load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
5load(":targets.bzl", "define_common_targets")
6load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_verision")
7
8oncall("executorch")
9
10define_common_targets()
11
12runtime.python_library(
13    name = "llama_transformer",
14    srcs = [
15        "llama_transformer.py",
16        "rope.py",
17    ],
18    _is_external_target = True,
19    base_module = "executorch.examples.models.llama",
20    visibility = [
21        "//executorch/...",
22        "@EXECUTORCH_CLIENTS",
23    ],
24    deps = [
25        "//caffe2:torch",
26    ],
27)
28
29runtime.python_library(
30    name = "llama2_model",
31    srcs = [
32        "__init__.py",
33        "fairseq2.py",
34        "model.py",
35    ],
36    _is_external_target = True,
37    base_module = "executorch.examples.models.llama",
38    resources = {
39        "//executorch/examples/models/llama/params:params": "params",
40    },
41    visibility = [
42        "//bento/...",
43        "//bento_kernels/...",
44        "//executorch/...",
45    ],
46    deps = [
47        "//caffe2:torch",
48        "//executorch/examples/models:model_base",
49        "//executorch/examples/models/llama:llama_transformer",
50        "//executorch/examples/models:checkpoint",
51    ],
52)
53
54runtime.python_binary(
55    name = "export_llama",
56    main_function = "executorch.examples.models.llama.export_llama.main",
57    # visibility = ["//executorch/examples/..."],
58    preload_deps = [
59        "//executorch/extension/llm/custom_ops:model_sharding_py",
60        "//executorch/extension/llm/custom_ops:custom_ops_aot_lib",
61        "//executorch/kernels/quantized:aot_lib",
62    ],
63    deps = [
64        ":export_library",
65        "//caffe2:torch",
66        "//executorch/extension/pybindings:aten_lib",
67    ],
68)
69
70runtime.command_alias(
71    name = "export_llama_qnn",
72    env = {
73        "LD_LIBRARY_PATH": "$(location fbsource//third-party/qualcomm/qnn/qnn-{0}:qnn_offline_compile_libs)".format(get_qnn_library_verision()),
74    },
75    exe = ":export_llama",
76)
77
78runtime.python_library(
79    name = "export_library",
80    srcs = [
81        "export_llama.py",
82        "export_llama_lib.py",
83        "model.py",
84        "source_transformation/apply_spin_quant_r1_r2.py",
85        "source_transformation/attention.py",
86        "source_transformation/lora.py",
87        "source_transformation/pre_quantization.py",
88        "source_transformation/prune_vocab.py",
89        "source_transformation/quantize.py",
90        "source_transformation/quantized_kv_cache.py",
91        "source_transformation/rms_norm.py",
92        "source_transformation/rope.py",
93        "source_transformation/sdpa.py",
94        "source_transformation/spin_quant.py",
95        "source_transformation/vulkan_rope.py",
96    ],
97    _is_external_target = True,
98    base_module = "executorch.examples.models.llama",
99    visibility = [
100        "//bento/...",
101        "//bento_kernels/...",
102        "//executorch/examples/...",
103        "@EXECUTORCH_CLIENTS",
104    ],
105    deps = [
106        "//ai_codesign/gen_ai/fast_hadamard_transform:fast_hadamard_transform",
107        "//caffe2:torch",
108        "//executorch/backends/vulkan/_passes:vulkan_passes",
109        "//executorch/examples/models:model_base",
110        "//executorch/examples/models:models",
111        "//executorch/extension/llm/custom_ops:custom_ops_aot_py",
112        "//executorch/extension/llm/export:export_lib",
113        # one definition has to be included in the user of the libarary
114        # depending on what library the client wants to use
115        # "//executorch/extension/pybindings:aten_lib",
116        # "//executorch/extension/pybindings:portable_lib",
117        # "//executorch/extension/pybindings:portable_lib_plus_custom",
118        "//executorch/devtools/etrecord:etrecord",
119        "//executorch/util:memory_profiler",
120        "//executorch/util:python_profiler",
121        "fbsource//third-party/pypi/coremltools:coremltools",
122        "fbsource//third-party/pypi/sentencepiece:sentencepiece",
123        "//pytorch/ao:torchao",
124    ],
125)
126
127runtime.python_binary(
128    name = "eval_llama",
129    main_function = "executorch.examples.models.llama.eval_llama.main",
130    deps = [
131        ":eval_library",
132        "//caffe2:torch",
133    ],
134)
135
136runtime.python_library(
137    name = "eval_library",
138    srcs = [
139        "eval_llama.py",
140        "eval_llama_lib.py",
141        "evaluate/eager_eval.py",
142    ],
143    _is_external_target = True,
144    base_module = "executorch.examples.models.llama",
145    visibility = [
146        "//bento/...",
147        "//bento_kernels/...",
148        "//executorch/examples/...",
149        "@EXECUTORCH_CLIENTS",
150    ],
151    deps = [
152        "fbsource//third-party/pypi/lm-eval:lm-eval",
153        "fbsource//third-party/pypi/tiktoken:tiktoken",
154        ":export_library",
155        "//executorch/examples/models/llama/tokenizer:tiktoken_py",
156        "//executorch/extension/llm/export:export_lib",
157        "//executorch/extension/llm/tokenizer:tokenizer_py_lib",
158        "//executorch/extension/pybindings:portable_lib",
159    ],
160)
161
162runtime.python_library(
163    name = "quantized_kv_cache",
164    srcs = [
165        "source_transformation/quantized_kv_cache.py",
166    ],
167    _is_external_target = True,
168    visibility = ["//executorch/..."],
169    deps = [
170        "//caffe2:torch",
171    ],
172)
173
174runtime.python_library(
175    name = "sdpa",
176    srcs = [
177        "source_transformation/sdpa.py",
178    ],
179    _is_external_target = True,
180    visibility = ["//executorch/..."],
181    deps = [
182        "//caffe2:torch",
183    ],
184)
185
186runtime.python_test(
187    name = "quantized_kv_cache_test",
188    srcs = [
189        "source_transformation/test_quantized_kv_cache.py",
190    ],
191    preload_deps = [
192        "//executorch/extension/llm/custom_ops:custom_ops_aot_lib",
193    ],
194    deps = [
195        ":quantized_kv_cache",
196        "//caffe2:torch",
197        "//executorch/examples/models/llama:llama_transformer",
198    ],
199)
200
201runtime.python_test(
202    name = "quantized_sdpa_with_kv_cache_test",
203    srcs = [
204        "source_transformation/test_sdpa_with_quantized_kv_cache.py",
205    ],
206    preload_deps = [
207        "//executorch/extension/llm/custom_ops:custom_ops_aot_lib",
208    ],
209    deps = [
210        ":quantized_kv_cache",
211        ":sdpa",
212        "//caffe2:torch",
213        "//executorch/examples/models/llama:llama_transformer",
214    ],
215)
216