1#!/usr/bin/env python 2# Copyright 2017 The Chromium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6import os 7import re 8import sys 9 10kUsage = '''Usage: truncate_net_log.py INPUT_FILE OUTPUT_FILE TRUNCATED_SIZE 11 12Creates a smaller version of INPUT_FILE (which is a chrome-net-export-log.json 13formatted NetLog file) and saves it to OUTPUT_FILE. Note that this works by 14reading the file line by line and not fully parsing the JSON, so it must match 15the exact format (whitespace and all). 16 17File truncation is done by dropping the oldest events and keeping everything 18else. 19 20Parameters: 21 22 INPUT_FILE: 23 Path to net-export JSON file 24 25 OUTPUT_FILE: 26 Path to save truncated file to 27 28 TRUNCATED_SIZE: 29 The desired (approximate) size for the truncated file. May use a suffix to 30 indicate units. Examples: 31 2003 --> 2003 bytes 32 100K --> 100 KiB 33 8M --> 8 MiB 34 1.5m --> 1.5 MiB 35''' 36 37def get_file_size(path): 38 '''Returns the filesize of |path| in bytes''' 39 return os.stat(path).st_size 40 41 42def truncate_log_file(in_path, out_path, desired_size): 43 '''Copies |in_path| to |out_path| such that it is approximately 44 |desired_size| bytes large. This is accomplished by dropping the oldest 45 events first. The final file size may not be exactly |desired_size| as only 46 complete event lines are skipped.''' 47 orig_size = get_file_size(in_path) 48 bytes_to_truncate = orig_size - desired_size 49 50 # This variable is True if the current line being processed is an Event line. 51 inside_events = False 52 with open(out_path, 'w') as out_file: 53 with open(in_path, 'r') as in_file: 54 for line in in_file: 55 # The final line before polledData closes the events array, and hence 56 # ends in "],". The check for polledData is more for documentation 57 # sake. 58 if inside_events and (line.startswith('"polledData": {' or 59 line.endswith('],\n'))): 60 inside_events = False 61 62 # If this is an event line and need to drop more bytes, go ahead and 63 # skip the line. Otherwise copy it to the output file. 64 if inside_events and bytes_to_truncate > 0: 65 bytes_to_truncate -= len(line) 66 else: 67 out_file.write(line) 68 69 # All lines after this are events (up until the closing square 70 # bracket). 71 if line.startswith('"events": ['): 72 inside_events = True 73 74 sys.stdout.write( 75 'Truncated file from %d to %d bytes\n' % (orig_size, 76 get_file_size(out_path))) 77 78def parse_filesize_str(filesize_str): 79 '''Parses a string representation of a file size into a byte value, or None 80 on failure''' 81 filesize_str = filesize_str.lower() 82 m = re.match('([0-9\.]+)([km]?)', filesize_str) 83 84 if not m: 85 return None 86 87 # Try to parse as decimal (regex above accepts some invalid decimals too). 88 float_value = 0.0 89 try: 90 float_value = float(m.group(1)) 91 except ValueError: 92 return None 93 94 kSuffixValueBytes = { 95 'k': 1024, 96 'm': 1024 * 1024, 97 '': 1, 98 } 99 100 suffix = m.group(2) 101 return int(float_value * kSuffixValueBytes[suffix]) 102 103 104def main(): 105 if len(sys.argv) != 4: 106 sys.stderr.write('ERROR: Requires 3 command line arguments\n') 107 sys.stderr.write(kUsage) 108 sys.exit(1) 109 110 in_path = os.path.normpath(sys.argv[1]) 111 out_path = os.path.normpath(sys.argv[2]) 112 113 if in_path == out_path: 114 sys.stderr.write('ERROR: OUTPUT_FILE must be different from INPUT_FILE\n') 115 sys.stderr.write(kUsage) 116 sys.exit(1) 117 118 size_str = sys.argv[3] 119 size_bytes = parse_filesize_str(size_str) 120 if size_bytes is None: 121 sys.stderr.write('ERROR: Could not parse TRUNCATED_SIZE: %s\n' % size_str) 122 sys.stderr.write(kUsage) 123 sys.exit(1) 124 125 truncate_log_file(in_path, out_path, size_bytes) 126 127 128if __name__ == '__main__': 129 main() 130