1#!/usr/bin/env python3 2# Copyright 2018 The ChromiumOS Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""A tool for running diffing tools and measuring patch sizes.""" 7 8import argparse 9import logging 10import os 11import subprocess 12import sys 13import tempfile 14 15 16class Error(Exception): 17 """Puffin general processing error.""" 18 19 20def ParseArguments(argv): 21 """Parses and Validates command line arguments. 22 23 Args: 24 argv: command line arguments to parse. 25 26 Returns: 27 The arguments list. 28 """ 29 parser = argparse.ArgumentParser() 30 31 parser.add_argument( 32 "--src-corpus", 33 metavar="DIR", 34 help="The source corpus directory with compressed files.", 35 ) 36 parser.add_argument( 37 "--tgt-corpus", 38 metavar="DIR", 39 help="The target corpus directory with compressed files.", 40 ) 41 parser.add_argument( 42 "--debug", action="store_true", help="Turns on verbosity." 43 ) 44 45 # Parse command-line arguments. 46 args = parser.parse_args(argv) 47 48 for corpus in (args.src_corpus, args.tgt_corpus): 49 if not corpus or not os.path.isdir(corpus): 50 raise Error( 51 "Corpus directory {} is non-existent or inaccesible".format( 52 corpus 53 ) 54 ) 55 return args 56 57 58def main(argv): 59 """The main function.""" 60 args = ParseArguments(argv[1:]) 61 62 if args.debug: 63 logging.getLogger().setLevel(logging.DEBUG) 64 65 # Construct list of appropriate files. 66 src_files = list( 67 filter( 68 os.path.isfile, 69 [ 70 os.path.join(args.src_corpus, f) 71 for f in os.listdir(args.src_corpus) 72 ], 73 ) 74 ) 75 tgt_files = list( 76 filter( 77 os.path.isfile, 78 [ 79 os.path.join(args.tgt_corpus, f) 80 for f in os.listdir(args.tgt_corpus) 81 ], 82 ) 83 ) 84 85 # Check if all files in src_files have a target file in tgt_files. 86 files_mismatch = set(map(os.path.basename, src_files)) - set( 87 map(os.path.basename, tgt_files) 88 ) 89 if files_mismatch: 90 raise Error( 91 "Target files {} do not exist in corpus: {}".format( 92 files_mismatch, args.tgt_corpus 93 ) 94 ) 95 96 for src in src_files: 97 with tempfile.NamedTemporaryFile() as puffdiff_patch, tempfile.NamedTemporaryFile() as bsdiff_patch: 98 tgt = os.path.join(args.tgt_corpus, os.path.basename(src)) 99 100 operation = "puffdiff" 101 cmd = [ 102 "puffin", 103 "--operation={}".format(operation), 104 "--src_file={}".format(src), 105 "--dst_file={}".format(tgt), 106 "--patch_file={}".format(puffdiff_patch.name), 107 ] 108 # Running the puffdiff operation 109 if subprocess.call(cmd) != 0: 110 raise Error( 111 "Puffin failed to do {} command: {}".format(operation, cmd) 112 ) 113 114 operation = "bsdiff" 115 cmd = ["bsdiff", "--type", "bz2", src, tgt, bsdiff_patch.name] 116 # Running the bsdiff operation 117 if subprocess.call(cmd) != 0: 118 raise Error( 119 "Failed to do {} command: {}".format(operation, cmd) 120 ) 121 122 logging.debug( 123 "%s(%d -> %d) : bsdiff(%d), puffdiff(%d)", 124 os.path.basename(src), 125 os.stat(src).st_size, 126 os.stat(tgt).st_size, 127 os.stat(bsdiff_patch.name).st_size, 128 os.stat(puffdiff_patch.name).st_size, 129 ) 130 131 return 0 132 133 134if __name__ == "__main__": 135 sys.exit(main(sys.argv)) 136