1# Any targets that should be shared between fbcode and xplat must be defined in 2# targets.bzl. This file can contain fbcode-only targets. 3 4load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") 5load(":targets.bzl", "define_common_targets") 6load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_verision") 7 8oncall("executorch") 9 10define_common_targets() 11 12runtime.python_library( 13 name = "llama_transformer", 14 srcs = [ 15 "llama_transformer.py", 16 "rope.py", 17 ], 18 _is_external_target = True, 19 base_module = "executorch.examples.models.llama", 20 visibility = [ 21 "//executorch/...", 22 "@EXECUTORCH_CLIENTS", 23 ], 24 deps = [ 25 "//caffe2:torch", 26 ], 27) 28 29runtime.python_library( 30 name = "llama2_model", 31 srcs = [ 32 "__init__.py", 33 "fairseq2.py", 34 "model.py", 35 ], 36 _is_external_target = True, 37 base_module = "executorch.examples.models.llama", 38 resources = { 39 "//executorch/examples/models/llama/params:params": "params", 40 }, 41 visibility = [ 42 "//bento/...", 43 "//bento_kernels/...", 44 "//executorch/...", 45 ], 46 deps = [ 47 "//caffe2:torch", 48 "//executorch/examples/models:model_base", 49 "//executorch/examples/models/llama:llama_transformer", 50 "//executorch/examples/models:checkpoint", 51 ], 52) 53 54runtime.python_binary( 55 name = "export_llama", 56 main_function = "executorch.examples.models.llama.export_llama.main", 57 # visibility = ["//executorch/examples/..."], 58 preload_deps = [ 59 "//executorch/extension/llm/custom_ops:model_sharding_py", 60 "//executorch/extension/llm/custom_ops:custom_ops_aot_lib", 61 "//executorch/kernels/quantized:aot_lib", 62 ], 63 deps = [ 64 ":export_library", 65 "//caffe2:torch", 66 "//executorch/extension/pybindings:aten_lib", 67 ], 68) 69 70runtime.command_alias( 71 name = "export_llama_qnn", 72 env = { 73 "LD_LIBRARY_PATH": "$(location fbsource//third-party/qualcomm/qnn/qnn-{0}:qnn_offline_compile_libs)".format(get_qnn_library_verision()), 74 }, 75 exe = ":export_llama", 76) 77 78runtime.python_library( 79 name = "export_library", 80 srcs = [ 81 "export_llama.py", 82 "export_llama_lib.py", 83 "model.py", 84 "source_transformation/apply_spin_quant_r1_r2.py", 85 "source_transformation/attention.py", 86 "source_transformation/lora.py", 87 "source_transformation/pre_quantization.py", 88 "source_transformation/prune_vocab.py", 89 "source_transformation/quantize.py", 90 "source_transformation/quantized_kv_cache.py", 91 "source_transformation/rms_norm.py", 92 "source_transformation/rope.py", 93 "source_transformation/sdpa.py", 94 "source_transformation/spin_quant.py", 95 "source_transformation/vulkan_rope.py", 96 ], 97 _is_external_target = True, 98 base_module = "executorch.examples.models.llama", 99 visibility = [ 100 "//bento/...", 101 "//bento_kernels/...", 102 "//executorch/examples/...", 103 "@EXECUTORCH_CLIENTS", 104 ], 105 deps = [ 106 "//ai_codesign/gen_ai/fast_hadamard_transform:fast_hadamard_transform", 107 "//caffe2:torch", 108 "//executorch/backends/vulkan/_passes:vulkan_passes", 109 "//executorch/examples/models:model_base", 110 "//executorch/examples/models:models", 111 "//executorch/extension/llm/custom_ops:custom_ops_aot_py", 112 "//executorch/extension/llm/export:export_lib", 113 # one definition has to be included in the user of the libarary 114 # depending on what library the client wants to use 115 # "//executorch/extension/pybindings:aten_lib", 116 # "//executorch/extension/pybindings:portable_lib", 117 # "//executorch/extension/pybindings:portable_lib_plus_custom", 118 "//executorch/devtools/etrecord:etrecord", 119 "//executorch/util:memory_profiler", 120 "//executorch/util:python_profiler", 121 "fbsource//third-party/pypi/coremltools:coremltools", 122 "fbsource//third-party/pypi/sentencepiece:sentencepiece", 123 "//pytorch/ao:torchao", 124 ], 125) 126 127runtime.python_binary( 128 name = "eval_llama", 129 main_function = "executorch.examples.models.llama.eval_llama.main", 130 deps = [ 131 ":eval_library", 132 "//caffe2:torch", 133 ], 134) 135 136runtime.python_library( 137 name = "eval_library", 138 srcs = [ 139 "eval_llama.py", 140 "eval_llama_lib.py", 141 "evaluate/eager_eval.py", 142 ], 143 _is_external_target = True, 144 base_module = "executorch.examples.models.llama", 145 visibility = [ 146 "//bento/...", 147 "//bento_kernels/...", 148 "//executorch/examples/...", 149 "@EXECUTORCH_CLIENTS", 150 ], 151 deps = [ 152 "fbsource//third-party/pypi/lm-eval:lm-eval", 153 "fbsource//third-party/pypi/tiktoken:tiktoken", 154 ":export_library", 155 "//executorch/examples/models/llama/tokenizer:tiktoken_py", 156 "//executorch/extension/llm/export:export_lib", 157 "//executorch/extension/llm/tokenizer:tokenizer_py_lib", 158 "//executorch/extension/pybindings:portable_lib", 159 ], 160) 161 162runtime.python_library( 163 name = "quantized_kv_cache", 164 srcs = [ 165 "source_transformation/quantized_kv_cache.py", 166 ], 167 _is_external_target = True, 168 visibility = ["//executorch/..."], 169 deps = [ 170 "//caffe2:torch", 171 ], 172) 173 174runtime.python_library( 175 name = "sdpa", 176 srcs = [ 177 "source_transformation/sdpa.py", 178 ], 179 _is_external_target = True, 180 visibility = ["//executorch/..."], 181 deps = [ 182 "//caffe2:torch", 183 ], 184) 185 186runtime.python_test( 187 name = "quantized_kv_cache_test", 188 srcs = [ 189 "source_transformation/test_quantized_kv_cache.py", 190 ], 191 preload_deps = [ 192 "//executorch/extension/llm/custom_ops:custom_ops_aot_lib", 193 ], 194 deps = [ 195 ":quantized_kv_cache", 196 "//caffe2:torch", 197 "//executorch/examples/models/llama:llama_transformer", 198 ], 199) 200 201runtime.python_test( 202 name = "quantized_sdpa_with_kv_cache_test", 203 srcs = [ 204 "source_transformation/test_sdpa_with_quantized_kv_cache.py", 205 ], 206 preload_deps = [ 207 "//executorch/extension/llm/custom_ops:custom_ops_aot_lib", 208 ], 209 deps = [ 210 ":quantized_kv_cache", 211 ":sdpa", 212 "//caffe2:torch", 213 "//executorch/examples/models/llama:llama_transformer", 214 ], 215) 216