xref: /aosp_15_r20/external/puffin/scripts/measure_patch_size.py (revision 07fb1d065b7cfb4729786fadd42a612532d2f466)
1#!/usr/bin/env python3
2# Copyright 2018 The ChromiumOS Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A tool for running diffing tools and measuring patch sizes."""
7
8import argparse
9import logging
10import os
11import subprocess
12import sys
13import tempfile
14
15
16class Error(Exception):
17    """Puffin general processing error."""
18
19
20def ParseArguments(argv):
21    """Parses and Validates command line arguments.
22
23    Args:
24        argv: command line arguments to parse.
25
26    Returns:
27        The arguments list.
28    """
29    parser = argparse.ArgumentParser()
30
31    parser.add_argument(
32        "--src-corpus",
33        metavar="DIR",
34        help="The source corpus directory with compressed files.",
35    )
36    parser.add_argument(
37        "--tgt-corpus",
38        metavar="DIR",
39        help="The target corpus directory with compressed files.",
40    )
41    parser.add_argument(
42        "--debug", action="store_true", help="Turns on verbosity."
43    )
44
45    # Parse command-line arguments.
46    args = parser.parse_args(argv)
47
48    for corpus in (args.src_corpus, args.tgt_corpus):
49        if not corpus or not os.path.isdir(corpus):
50            raise Error(
51                "Corpus directory {} is non-existent or inaccesible".format(
52                    corpus
53                )
54            )
55    return args
56
57
58def main(argv):
59    """The main function."""
60    args = ParseArguments(argv[1:])
61
62    if args.debug:
63        logging.getLogger().setLevel(logging.DEBUG)
64
65    # Construct list of appropriate files.
66    src_files = list(
67        filter(
68            os.path.isfile,
69            [
70                os.path.join(args.src_corpus, f)
71                for f in os.listdir(args.src_corpus)
72            ],
73        )
74    )
75    tgt_files = list(
76        filter(
77            os.path.isfile,
78            [
79                os.path.join(args.tgt_corpus, f)
80                for f in os.listdir(args.tgt_corpus)
81            ],
82        )
83    )
84
85    # Check if all files in src_files have a target file in tgt_files.
86    files_mismatch = set(map(os.path.basename, src_files)) - set(
87        map(os.path.basename, tgt_files)
88    )
89    if files_mismatch:
90        raise Error(
91            "Target files {} do not exist in corpus: {}".format(
92                files_mismatch, args.tgt_corpus
93            )
94        )
95
96    for src in src_files:
97        with tempfile.NamedTemporaryFile() as puffdiff_patch, tempfile.NamedTemporaryFile() as bsdiff_patch:
98            tgt = os.path.join(args.tgt_corpus, os.path.basename(src))
99
100            operation = "puffdiff"
101            cmd = [
102                "puffin",
103                "--operation={}".format(operation),
104                "--src_file={}".format(src),
105                "--dst_file={}".format(tgt),
106                "--patch_file={}".format(puffdiff_patch.name),
107            ]
108            # Running the puffdiff operation
109            if subprocess.call(cmd) != 0:
110                raise Error(
111                    "Puffin failed to do {} command: {}".format(operation, cmd)
112                )
113
114            operation = "bsdiff"
115            cmd = ["bsdiff", "--type", "bz2", src, tgt, bsdiff_patch.name]
116            # Running the bsdiff operation
117            if subprocess.call(cmd) != 0:
118                raise Error(
119                    "Failed to do {} command: {}".format(operation, cmd)
120                )
121
122            logging.debug(
123                "%s(%d -> %d) : bsdiff(%d), puffdiff(%d)",
124                os.path.basename(src),
125                os.stat(src).st_size,
126                os.stat(tgt).st_size,
127                os.stat(bsdiff_patch.name).st_size,
128                os.stat(puffdiff_patch.name).st_size,
129            )
130
131    return 0
132
133
134if __name__ == "__main__":
135    sys.exit(main(sys.argv))
136