1*333d2b36SAndroid Build Coastguard Worker#!/usr/bin/env python 2*333d2b36SAndroid Build Coastguard Worker# 3*333d2b36SAndroid Build Coastguard Worker# Copyright (C) 2018 The Android Open Source Project 4*333d2b36SAndroid Build Coastguard Worker# 5*333d2b36SAndroid Build Coastguard Worker# Licensed under the Apache License, Version 2.0 (the "License"); 6*333d2b36SAndroid Build Coastguard Worker# you may not use this file except in compliance with the License. 7*333d2b36SAndroid Build Coastguard Worker# You may obtain a copy of the License at 8*333d2b36SAndroid Build Coastguard Worker# 9*333d2b36SAndroid Build Coastguard Worker# http://www.apache.org/licenses/LICENSE-2.0 10*333d2b36SAndroid Build Coastguard Worker# 11*333d2b36SAndroid Build Coastguard Worker# Unless required by applicable law or agreed to in writing, software 12*333d2b36SAndroid Build Coastguard Worker# distributed under the License is distributed on an "AS IS" BASIS, 13*333d2b36SAndroid Build Coastguard Worker# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14*333d2b36SAndroid Build Coastguard Worker# See the License for the specific language governing permissions and 15*333d2b36SAndroid Build Coastguard Worker# limitations under the License. 16*333d2b36SAndroid Build Coastguard Worker"""Merge multiple CSV files, possibly with different columns. 17*333d2b36SAndroid Build Coastguard Worker""" 18*333d2b36SAndroid Build Coastguard Worker 19*333d2b36SAndroid Build Coastguard Workerimport argparse 20*333d2b36SAndroid Build Coastguard Workerimport csv 21*333d2b36SAndroid Build Coastguard Workerimport io 22*333d2b36SAndroid Build Coastguard Workerimport heapq 23*333d2b36SAndroid Build Coastguard Workerimport itertools 24*333d2b36SAndroid Build Coastguard Workerimport operator 25*333d2b36SAndroid Build Coastguard Worker 26*333d2b36SAndroid Build Coastguard Workerfrom zipfile import ZipFile 27*333d2b36SAndroid Build Coastguard Worker 28*333d2b36SAndroid Build Coastguard Workerargs_parser = argparse.ArgumentParser( 29*333d2b36SAndroid Build Coastguard Worker description='Merge given CSV files into a single one.' 30*333d2b36SAndroid Build Coastguard Worker) 31*333d2b36SAndroid Build Coastguard Workerargs_parser.add_argument( 32*333d2b36SAndroid Build Coastguard Worker '--header', 33*333d2b36SAndroid Build Coastguard Worker help='Comma separated field names; ' 34*333d2b36SAndroid Build Coastguard Worker 'if missing determines the header from input files.', 35*333d2b36SAndroid Build Coastguard Worker) 36*333d2b36SAndroid Build Coastguard Workerargs_parser.add_argument( 37*333d2b36SAndroid Build Coastguard Worker '--zip_input', 38*333d2b36SAndroid Build Coastguard Worker help='Treat files as ZIP archives containing CSV files to merge.', 39*333d2b36SAndroid Build Coastguard Worker action="store_true", 40*333d2b36SAndroid Build Coastguard Worker) 41*333d2b36SAndroid Build Coastguard Workerargs_parser.add_argument( 42*333d2b36SAndroid Build Coastguard Worker '--key_field', 43*333d2b36SAndroid Build Coastguard Worker help='The name of the field by which the rows should be sorted. ' 44*333d2b36SAndroid Build Coastguard Worker 'Must be in the field names. ' 45*333d2b36SAndroid Build Coastguard Worker 'Will be the first field in the output. ' 46*333d2b36SAndroid Build Coastguard Worker 'All input files must be sorted by that field.', 47*333d2b36SAndroid Build Coastguard Worker) 48*333d2b36SAndroid Build Coastguard Workerargs_parser.add_argument( 49*333d2b36SAndroid Build Coastguard Worker '--output', 50*333d2b36SAndroid Build Coastguard Worker help='Output file for merged CSV.', 51*333d2b36SAndroid Build Coastguard Worker default='-', 52*333d2b36SAndroid Build Coastguard Worker type=argparse.FileType('w'), 53*333d2b36SAndroid Build Coastguard Worker) 54*333d2b36SAndroid Build Coastguard Workerargs_parser.add_argument('files', nargs=argparse.REMAINDER) 55*333d2b36SAndroid Build Coastguard Workerargs = args_parser.parse_args() 56*333d2b36SAndroid Build Coastguard Worker 57*333d2b36SAndroid Build Coastguard Worker 58*333d2b36SAndroid Build Coastguard Workerdef dict_reader(csvfile): 59*333d2b36SAndroid Build Coastguard Worker return csv.DictReader(csvfile, delimiter=',', quotechar='|') 60*333d2b36SAndroid Build Coastguard Worker 61*333d2b36SAndroid Build Coastguard Worker 62*333d2b36SAndroid Build Coastguard Workercsv_readers = [] 63*333d2b36SAndroid Build Coastguard Workerif not args.zip_input: 64*333d2b36SAndroid Build Coastguard Worker for file in args.files: 65*333d2b36SAndroid Build Coastguard Worker csv_readers.append(dict_reader(open(file, 'r'))) 66*333d2b36SAndroid Build Coastguard Workerelse: 67*333d2b36SAndroid Build Coastguard Worker for file in args.files: 68*333d2b36SAndroid Build Coastguard Worker with ZipFile(file) as zipfile: 69*333d2b36SAndroid Build Coastguard Worker for entry in zipfile.namelist(): 70*333d2b36SAndroid Build Coastguard Worker if entry.endswith('.uau'): 71*333d2b36SAndroid Build Coastguard Worker csv_readers.append( 72*333d2b36SAndroid Build Coastguard Worker dict_reader(io.TextIOWrapper(zipfile.open(entry, 'r'))) 73*333d2b36SAndroid Build Coastguard Worker ) 74*333d2b36SAndroid Build Coastguard Worker 75*333d2b36SAndroid Build Coastguard Workerif args.header: 76*333d2b36SAndroid Build Coastguard Worker fieldnames = args.header.split(',') 77*333d2b36SAndroid Build Coastguard Workerelse: 78*333d2b36SAndroid Build Coastguard Worker headers = {} 79*333d2b36SAndroid Build Coastguard Worker # Build union of all columns from source files: 80*333d2b36SAndroid Build Coastguard Worker for reader in csv_readers: 81*333d2b36SAndroid Build Coastguard Worker for fieldname in reader.fieldnames: 82*333d2b36SAndroid Build Coastguard Worker headers[fieldname] = "" 83*333d2b36SAndroid Build Coastguard Worker fieldnames = list(headers.keys()) 84*333d2b36SAndroid Build Coastguard Worker 85*333d2b36SAndroid Build Coastguard Worker# By default chain the csv readers together so that the resulting output is 86*333d2b36SAndroid Build Coastguard Worker# the concatenation of the rows from each of them: 87*333d2b36SAndroid Build Coastguard Workerall_rows = itertools.chain.from_iterable(csv_readers) 88*333d2b36SAndroid Build Coastguard Worker 89*333d2b36SAndroid Build Coastguard Workerif len(csv_readers) > 0: 90*333d2b36SAndroid Build Coastguard Worker keyField = args.key_field 91*333d2b36SAndroid Build Coastguard Worker if keyField: 92*333d2b36SAndroid Build Coastguard Worker assert keyField in fieldnames, ( 93*333d2b36SAndroid Build Coastguard Worker "--key_field {} not found, must be one of {}\n" 94*333d2b36SAndroid Build Coastguard Worker ).format(keyField, ",".join(fieldnames)) 95*333d2b36SAndroid Build Coastguard Worker # Make the key field the first field in the output 96*333d2b36SAndroid Build Coastguard Worker keyFieldIndex = fieldnames.index(args.key_field) 97*333d2b36SAndroid Build Coastguard Worker fieldnames.insert(0, fieldnames.pop(keyFieldIndex)) 98*333d2b36SAndroid Build Coastguard Worker # Create an iterable that performs a lazy merge sort on the csv readers 99*333d2b36SAndroid Build Coastguard Worker # sorting the rows by the key field. 100*333d2b36SAndroid Build Coastguard Worker all_rows = heapq.merge(*csv_readers, key=operator.itemgetter(keyField)) 101*333d2b36SAndroid Build Coastguard Worker 102*333d2b36SAndroid Build Coastguard Worker# Write all rows from the input files to the output: 103*333d2b36SAndroid Build Coastguard Workerwriter = csv.DictWriter( 104*333d2b36SAndroid Build Coastguard Worker args.output, 105*333d2b36SAndroid Build Coastguard Worker delimiter=',', 106*333d2b36SAndroid Build Coastguard Worker quotechar='|', 107*333d2b36SAndroid Build Coastguard Worker quoting=csv.QUOTE_MINIMAL, 108*333d2b36SAndroid Build Coastguard Worker dialect='unix', 109*333d2b36SAndroid Build Coastguard Worker fieldnames=fieldnames, 110*333d2b36SAndroid Build Coastguard Worker) 111*333d2b36SAndroid Build Coastguard Workerwriter.writeheader() 112*333d2b36SAndroid Build Coastguard Worker 113*333d2b36SAndroid Build Coastguard Worker# Read all the rows from the input and write them to the output in the correct 114*333d2b36SAndroid Build Coastguard Worker# order: 115*333d2b36SAndroid Build Coastguard Workerfor row in all_rows: 116*333d2b36SAndroid Build Coastguard Worker writer.writerow(row) 117