#!/usr/bin/env python3 # Copyright 2018 The ChromiumOS Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """A tool for running diffing tools and measuring patch sizes.""" import argparse import logging import os import subprocess import sys import tempfile class Error(Exception): """Puffin general processing error.""" def ParseArguments(argv): """Parses and Validates command line arguments. Args: argv: command line arguments to parse. Returns: The arguments list. """ parser = argparse.ArgumentParser() parser.add_argument( "--src-corpus", metavar="DIR", help="The source corpus directory with compressed files.", ) parser.add_argument( "--tgt-corpus", metavar="DIR", help="The target corpus directory with compressed files.", ) parser.add_argument( "--debug", action="store_true", help="Turns on verbosity." ) # Parse command-line arguments. args = parser.parse_args(argv) for corpus in (args.src_corpus, args.tgt_corpus): if not corpus or not os.path.isdir(corpus): raise Error( "Corpus directory {} is non-existent or inaccesible".format( corpus ) ) return args def main(argv): """The main function.""" args = ParseArguments(argv[1:]) if args.debug: logging.getLogger().setLevel(logging.DEBUG) # Construct list of appropriate files. src_files = list( filter( os.path.isfile, [ os.path.join(args.src_corpus, f) for f in os.listdir(args.src_corpus) ], ) ) tgt_files = list( filter( os.path.isfile, [ os.path.join(args.tgt_corpus, f) for f in os.listdir(args.tgt_corpus) ], ) ) # Check if all files in src_files have a target file in tgt_files. files_mismatch = set(map(os.path.basename, src_files)) - set( map(os.path.basename, tgt_files) ) if files_mismatch: raise Error( "Target files {} do not exist in corpus: {}".format( files_mismatch, args.tgt_corpus ) ) for src in src_files: with tempfile.NamedTemporaryFile() as puffdiff_patch, tempfile.NamedTemporaryFile() as bsdiff_patch: tgt = os.path.join(args.tgt_corpus, os.path.basename(src)) operation = "puffdiff" cmd = [ "puffin", "--operation={}".format(operation), "--src_file={}".format(src), "--dst_file={}".format(tgt), "--patch_file={}".format(puffdiff_patch.name), ] # Running the puffdiff operation if subprocess.call(cmd) != 0: raise Error( "Puffin failed to do {} command: {}".format(operation, cmd) ) operation = "bsdiff" cmd = ["bsdiff", "--type", "bz2", src, tgt, bsdiff_patch.name] # Running the bsdiff operation if subprocess.call(cmd) != 0: raise Error( "Failed to do {} command: {}".format(operation, cmd) ) logging.debug( "%s(%d -> %d) : bsdiff(%d), puffdiff(%d)", os.path.basename(src), os.stat(src).st_size, os.stat(tgt).st_size, os.stat(bsdiff_patch.name).st_size, os.stat(puffdiff_patch.name).st_size, ) return 0 if __name__ == "__main__": sys.exit(main(sys.argv))