scripts/hiddenapi/merge_csv.py - LeafOS-Project/android_build_soong - Gitiles

 #!/usr/bin/env python
 #
 # Copyright (C) 2018 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Merge multiple CSV files, possibly with different columns.
 """

 import argparse
 import csv
 import io
 import heapq
 import itertools
 import operator

 from zipfile import ZipFile

 args_parser = argparse.ArgumentParser(
     description='Merge given CSV files into a single one.'
 )
 args_parser.add_argument(
     '--header',
     help='Comma separated field names; '
     'if missing determines the header from input files.',
 )
 args_parser.add_argument(
     '--zip_input',
     help='Treat files as ZIP archives containing CSV files to merge.',
     action="store_true",
 )
 args_parser.add_argument(
     '--key_field',
     help='The name of the field by which the rows should be sorted. '
     'Must be in the field names. '
     'Will be the first field in the output. '
     'All input files must be sorted by that field.',
 )
 args_parser.add_argument(
     '--output',
     help='Output file for merged CSV.',
     default='-',
     type=argparse.FileType('w'),
 )
 args_parser.add_argument('files', nargs=argparse.REMAINDER)
 args = args_parser.parse_args()


 def dict_reader(csvfile):
     return csv.DictReader(csvfile, delimiter=',', quotechar='|')


 csv_readers = []
 if not args.zip_input:
     for file in args.files:
         csv_readers.append(dict_reader(open(file, 'r')))
 else:
     for file in args.files:
         with ZipFile(file) as zipfile:
             for entry in zipfile.namelist():
                 if entry.endswith('.uau'):
                     csv_readers.append(
                         dict_reader(io.TextIOWrapper(zipfile.open(entry, 'r')))
                     )

 if args.header:
     fieldnames = args.header.split(',')
 else:
     headers = {}
     # Build union of all columns from source files:
     for reader in csv_readers:
         for fieldname in reader.fieldnames:
             headers[fieldname] = ""
     fieldnames = list(headers.keys())

 # By default chain the csv readers together so that the resulting output is
 # the concatenation of the rows from each of them:
 all_rows = itertools.chain.from_iterable(csv_readers)

 if len(csv_readers) > 0:
     keyField = args.key_field
     if keyField:
         assert keyField in fieldnames, (
             "--key_field {} not found, must be one of {}\n"
         ).format(keyField, ",".join(fieldnames))
         # Make the key field the first field in the output
         keyFieldIndex = fieldnames.index(args.key_field)
         fieldnames.insert(0, fieldnames.pop(keyFieldIndex))
         # Create an iterable that performs a lazy merge sort on the csv readers
         # sorting the rows by the key field.
         all_rows = heapq.merge(*csv_readers, key=operator.itemgetter(keyField))

 # Write all rows from the input files to the output:
 writer = csv.DictWriter(
     args.output,
     delimiter=',',
     quotechar='|',
     quoting=csv.QUOTE_MINIMAL,
     dialect='unix',
     fieldnames=fieldnames,
 )
 writer.writeheader()

 # Read all the rows from the input and write them to the output in the correct
 # order:
 for row in all_rows:
     writer.writerow(row)
	#!/usr/bin/env python
	#
	# Copyright (C) 2018 The Android Open Source Project
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""Merge multiple CSV files, possibly with different columns.
	"""

	import argparse
	import csv
	import io
	import heapq
	import itertools
	import operator

	from zipfile import ZipFile

	args_parser = argparse.ArgumentParser(
	description='Merge given CSV files into a single one.'
	)
	args_parser.add_argument(
	'--header',
	help='Comma separated field names; '
	'if missing determines the header from input files.',
	)
	args_parser.add_argument(
	'--zip_input',
	help='Treat files as ZIP archives containing CSV files to merge.',
	action="store_true",
	)
	args_parser.add_argument(
	'--key_field',
	help='The name of the field by which the rows should be sorted. '
	'Must be in the field names. '
	'Will be the first field in the output. '
	'All input files must be sorted by that field.',
	)
	args_parser.add_argument(
	'--output',
	help='Output file for merged CSV.',
	default='-',
	type=argparse.FileType('w'),
	)
	args_parser.add_argument('files', nargs=argparse.REMAINDER)
	args = args_parser.parse_args()


	def dict_reader(csvfile):
	return csv.DictReader(csvfile, delimiter=',', quotechar='\|')


	csv_readers = []
	if not args.zip_input:
	for file in args.files:
	csv_readers.append(dict_reader(open(file, 'r')))
	else:
	for file in args.files:
	with ZipFile(file) as zipfile:
	for entry in zipfile.namelist():
	if entry.endswith('.uau'):
	csv_readers.append(
	dict_reader(io.TextIOWrapper(zipfile.open(entry, 'r')))
	)

	if args.header:
	fieldnames = args.header.split(',')
	else:
	headers = {}
	# Build union of all columns from source files:
	for reader in csv_readers:
	for fieldname in reader.fieldnames:
	headers[fieldname] = ""
	fieldnames = list(headers.keys())

	# By default chain the csv readers together so that the resulting output is
	# the concatenation of the rows from each of them:
	all_rows = itertools.chain.from_iterable(csv_readers)

	if len(csv_readers) > 0:
	keyField = args.key_field
	if keyField:
	assert keyField in fieldnames, (
	"--key_field {} not found, must be one of {}\n"
	).format(keyField, ",".join(fieldnames))
	# Make the key field the first field in the output
	keyFieldIndex = fieldnames.index(args.key_field)
	fieldnames.insert(0, fieldnames.pop(keyFieldIndex))
	# Create an iterable that performs a lazy merge sort on the csv readers
	# sorting the rows by the key field.
	all_rows = heapq.merge(*csv_readers, key=operator.itemgetter(keyField))

	# Write all rows from the input files to the output:
	writer = csv.DictWriter(
	args.output,
	delimiter=',',
	quotechar='\|',
	quoting=csv.QUOTE_MINIMAL,
	dialect='unix',
	fieldnames=fieldnames,
	)
	writer.writeheader()

	# Read all the rows from the input and write them to the output in the correct
	# order:
	for row in all_rows:
	writer.writerow(row)