xref: /aosp_15_r20/external/executorch/extension/gguf_util/convert_main.py (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1# Copyright (c) Meta Platforms, Inc. and affiliates.
2# All rights reserved.
3#
4# This source code is licensed under the BSD-style license found in the
5# LICENSE file in the root directory of this source tree.
6
7import argparse
8
9from executorch.extension.gguf_util.converter import convert_to_pte
10from executorch.extension.gguf_util.load_gguf import load_file
11
12
13def save_pte_program(_, pte_file) -> None:
14    # TODO (mnachin): Save the PTE program to a file
15    print(f"Saving PTE program to {pte_file}")
16
17
18def main() -> None:
19    parser = argparse.ArgumentParser()
20    parser.add_argument(
21        "--gguf_file",
22        type=str,
23        help="The GGUF file to load.",
24    )
25    parser.add_argument(
26        "--pte_file",
27        type=str,
28        help="The path to save the PTE file.",
29    )
30    args = parser.parse_args()
31
32    # Step 1: Load the GGUF file
33    gguf_model_args, gguf_weights = load_file(args.gguf_file)
34
35    # Step 2: Convert the GGUF model to PTE
36    # Currently, underneath the hood, it is first converting the GGUF model
37    # to a PyTorch model (nn.Module), then exporting to ET.
38    #
39    # NOTE: In the future, it may makes sense to refactor out the conversion from GGUF to nn.Module
40    # into its own package that can be shared between ExecuTorch and PyTorch core. I can
41    # imagine that there will be a need to do load GGUF file directly into PyTorch core, and
42    # use torch.compile/AOTInductor to accelerate on server, without ever touching ExecuTorch.
43    #
44    # TODO(mnachin): Add a knob to delegate to various backends.
45    pte_program = convert_to_pte(gguf_model_args, gguf_weights)
46
47    # Step 3: Save the PTE program so that
48    # it can be used by the ExecuTorch runtime
49    save_pte_program(pte_program, args.pte_file)
50
51
52if __name__ == "__main__":
53    main()
54