xref: /aosp_15_r20/build/soong/scripts/hiddenapi/merge_csv.py (revision 333d2b3687b3a337dbcca9d65000bca186795e39)
1*333d2b36SAndroid Build Coastguard Worker#!/usr/bin/env python
2*333d2b36SAndroid Build Coastguard Worker#
3*333d2b36SAndroid Build Coastguard Worker# Copyright (C) 2018 The Android Open Source Project
4*333d2b36SAndroid Build Coastguard Worker#
5*333d2b36SAndroid Build Coastguard Worker# Licensed under the Apache License, Version 2.0 (the "License");
6*333d2b36SAndroid Build Coastguard Worker# you may not use this file except in compliance with the License.
7*333d2b36SAndroid Build Coastguard Worker# You may obtain a copy of the License at
8*333d2b36SAndroid Build Coastguard Worker#
9*333d2b36SAndroid Build Coastguard Worker#      http://www.apache.org/licenses/LICENSE-2.0
10*333d2b36SAndroid Build Coastguard Worker#
11*333d2b36SAndroid Build Coastguard Worker# Unless required by applicable law or agreed to in writing, software
12*333d2b36SAndroid Build Coastguard Worker# distributed under the License is distributed on an "AS IS" BASIS,
13*333d2b36SAndroid Build Coastguard Worker# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*333d2b36SAndroid Build Coastguard Worker# See the License for the specific language governing permissions and
15*333d2b36SAndroid Build Coastguard Worker# limitations under the License.
16*333d2b36SAndroid Build Coastguard Worker"""Merge multiple CSV files, possibly with different columns.
17*333d2b36SAndroid Build Coastguard Worker"""
18*333d2b36SAndroid Build Coastguard Worker
19*333d2b36SAndroid Build Coastguard Workerimport argparse
20*333d2b36SAndroid Build Coastguard Workerimport csv
21*333d2b36SAndroid Build Coastguard Workerimport io
22*333d2b36SAndroid Build Coastguard Workerimport heapq
23*333d2b36SAndroid Build Coastguard Workerimport itertools
24*333d2b36SAndroid Build Coastguard Workerimport operator
25*333d2b36SAndroid Build Coastguard Worker
26*333d2b36SAndroid Build Coastguard Workerfrom zipfile import ZipFile
27*333d2b36SAndroid Build Coastguard Worker
28*333d2b36SAndroid Build Coastguard Workerargs_parser = argparse.ArgumentParser(
29*333d2b36SAndroid Build Coastguard Worker    description='Merge given CSV files into a single one.'
30*333d2b36SAndroid Build Coastguard Worker)
31*333d2b36SAndroid Build Coastguard Workerargs_parser.add_argument(
32*333d2b36SAndroid Build Coastguard Worker    '--header',
33*333d2b36SAndroid Build Coastguard Worker    help='Comma separated field names; '
34*333d2b36SAndroid Build Coastguard Worker    'if missing determines the header from input files.',
35*333d2b36SAndroid Build Coastguard Worker)
36*333d2b36SAndroid Build Coastguard Workerargs_parser.add_argument(
37*333d2b36SAndroid Build Coastguard Worker    '--zip_input',
38*333d2b36SAndroid Build Coastguard Worker    help='Treat files as ZIP archives containing CSV files to merge.',
39*333d2b36SAndroid Build Coastguard Worker    action="store_true",
40*333d2b36SAndroid Build Coastguard Worker)
41*333d2b36SAndroid Build Coastguard Workerargs_parser.add_argument(
42*333d2b36SAndroid Build Coastguard Worker    '--key_field',
43*333d2b36SAndroid Build Coastguard Worker    help='The name of the field by which the rows should be sorted. '
44*333d2b36SAndroid Build Coastguard Worker    'Must be in the field names. '
45*333d2b36SAndroid Build Coastguard Worker    'Will be the first field in the output. '
46*333d2b36SAndroid Build Coastguard Worker    'All input files must be sorted by that field.',
47*333d2b36SAndroid Build Coastguard Worker)
48*333d2b36SAndroid Build Coastguard Workerargs_parser.add_argument(
49*333d2b36SAndroid Build Coastguard Worker    '--output',
50*333d2b36SAndroid Build Coastguard Worker    help='Output file for merged CSV.',
51*333d2b36SAndroid Build Coastguard Worker    default='-',
52*333d2b36SAndroid Build Coastguard Worker    type=argparse.FileType('w'),
53*333d2b36SAndroid Build Coastguard Worker)
54*333d2b36SAndroid Build Coastguard Workerargs_parser.add_argument('files', nargs=argparse.REMAINDER)
55*333d2b36SAndroid Build Coastguard Workerargs = args_parser.parse_args()
56*333d2b36SAndroid Build Coastguard Worker
57*333d2b36SAndroid Build Coastguard Worker
58*333d2b36SAndroid Build Coastguard Workerdef dict_reader(csvfile):
59*333d2b36SAndroid Build Coastguard Worker    return csv.DictReader(csvfile, delimiter=',', quotechar='|')
60*333d2b36SAndroid Build Coastguard Worker
61*333d2b36SAndroid Build Coastguard Worker
62*333d2b36SAndroid Build Coastguard Workercsv_readers = []
63*333d2b36SAndroid Build Coastguard Workerif not args.zip_input:
64*333d2b36SAndroid Build Coastguard Worker    for file in args.files:
65*333d2b36SAndroid Build Coastguard Worker        csv_readers.append(dict_reader(open(file, 'r')))
66*333d2b36SAndroid Build Coastguard Workerelse:
67*333d2b36SAndroid Build Coastguard Worker    for file in args.files:
68*333d2b36SAndroid Build Coastguard Worker        with ZipFile(file) as zipfile:
69*333d2b36SAndroid Build Coastguard Worker            for entry in zipfile.namelist():
70*333d2b36SAndroid Build Coastguard Worker                if entry.endswith('.uau'):
71*333d2b36SAndroid Build Coastguard Worker                    csv_readers.append(
72*333d2b36SAndroid Build Coastguard Worker                        dict_reader(io.TextIOWrapper(zipfile.open(entry, 'r')))
73*333d2b36SAndroid Build Coastguard Worker                    )
74*333d2b36SAndroid Build Coastguard Worker
75*333d2b36SAndroid Build Coastguard Workerif args.header:
76*333d2b36SAndroid Build Coastguard Worker    fieldnames = args.header.split(',')
77*333d2b36SAndroid Build Coastguard Workerelse:
78*333d2b36SAndroid Build Coastguard Worker    headers = {}
79*333d2b36SAndroid Build Coastguard Worker    # Build union of all columns from source files:
80*333d2b36SAndroid Build Coastguard Worker    for reader in csv_readers:
81*333d2b36SAndroid Build Coastguard Worker        for fieldname in reader.fieldnames:
82*333d2b36SAndroid Build Coastguard Worker            headers[fieldname] = ""
83*333d2b36SAndroid Build Coastguard Worker    fieldnames = list(headers.keys())
84*333d2b36SAndroid Build Coastguard Worker
85*333d2b36SAndroid Build Coastguard Worker# By default chain the csv readers together so that the resulting output is
86*333d2b36SAndroid Build Coastguard Worker# the concatenation of the rows from each of them:
87*333d2b36SAndroid Build Coastguard Workerall_rows = itertools.chain.from_iterable(csv_readers)
88*333d2b36SAndroid Build Coastguard Worker
89*333d2b36SAndroid Build Coastguard Workerif len(csv_readers) > 0:
90*333d2b36SAndroid Build Coastguard Worker    keyField = args.key_field
91*333d2b36SAndroid Build Coastguard Worker    if keyField:
92*333d2b36SAndroid Build Coastguard Worker        assert keyField in fieldnames, (
93*333d2b36SAndroid Build Coastguard Worker            "--key_field {} not found, must be one of {}\n"
94*333d2b36SAndroid Build Coastguard Worker        ).format(keyField, ",".join(fieldnames))
95*333d2b36SAndroid Build Coastguard Worker        # Make the key field the first field in the output
96*333d2b36SAndroid Build Coastguard Worker        keyFieldIndex = fieldnames.index(args.key_field)
97*333d2b36SAndroid Build Coastguard Worker        fieldnames.insert(0, fieldnames.pop(keyFieldIndex))
98*333d2b36SAndroid Build Coastguard Worker        # Create an iterable that performs a lazy merge sort on the csv readers
99*333d2b36SAndroid Build Coastguard Worker        # sorting the rows by the key field.
100*333d2b36SAndroid Build Coastguard Worker        all_rows = heapq.merge(*csv_readers, key=operator.itemgetter(keyField))
101*333d2b36SAndroid Build Coastguard Worker
102*333d2b36SAndroid Build Coastguard Worker# Write all rows from the input files to the output:
103*333d2b36SAndroid Build Coastguard Workerwriter = csv.DictWriter(
104*333d2b36SAndroid Build Coastguard Worker    args.output,
105*333d2b36SAndroid Build Coastguard Worker    delimiter=',',
106*333d2b36SAndroid Build Coastguard Worker    quotechar='|',
107*333d2b36SAndroid Build Coastguard Worker    quoting=csv.QUOTE_MINIMAL,
108*333d2b36SAndroid Build Coastguard Worker    dialect='unix',
109*333d2b36SAndroid Build Coastguard Worker    fieldnames=fieldnames,
110*333d2b36SAndroid Build Coastguard Worker)
111*333d2b36SAndroid Build Coastguard Workerwriter.writeheader()
112*333d2b36SAndroid Build Coastguard Worker
113*333d2b36SAndroid Build Coastguard Worker# Read all the rows from the input and write them to the output in the correct
114*333d2b36SAndroid Build Coastguard Worker# order:
115*333d2b36SAndroid Build Coastguard Workerfor row in all_rows:
116*333d2b36SAndroid Build Coastguard Worker    writer.writerow(row)
117