tools/runtime_memusage/symbol_trace_info.py - LeafOS-Project/android_art - Gitiles

 #!/usr/bin/env python
 #
 # Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 """Outputs quantitative information about Address Sanitizer traces."""

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 from collections import Counter
 from datetime import datetime
 import argparse
 import bisect
 import os
 import re


 def find_match(list_substrings, big_string):
     """Returns the category a trace belongs to by searching substrings."""
     for ind, substr in enumerate(list_substrings):
         if big_string.find(substr) != -1:
             return ind
     return list_substrings.index("Uncategorized")


 def absolute_to_relative(data_lists, symbol_traces):
     """Address changed to Dex File offset and shifting time to 0 min in ms."""

     offsets = data_lists["offsets"]
     time_offsets = data_lists["times"]

     # Format of time provided by logcat
     time_format_str = "%H:%M:%S.%f"
     first_access_time = datetime.strptime(data_lists["plot_list"][0][0],
                                           time_format_str)
     for ind, elem in enumerate(data_lists["plot_list"]):
         elem_date_time = datetime.strptime(elem[0], time_format_str)
         # Shift time values so that first access is at time 0 milliseconds
         elem[0] = int((elem_date_time - first_access_time).total_seconds() *
                       1000)
         address_access = int(elem[1], 16)
         # For each poisoned address, find highest Dex File starting address less
         # than address_access
         dex_start_list, dex_size_list = zip(*data_lists["dex_ends_list"])
         dex_file_ind = bisect.bisect(dex_start_list, address_access) - 1
         dex_offset = address_access - dex_start_list[dex_file_ind]
         # Assumes that offsets is already sorted and constrains offset to be
         # within range of the dex_file
         max_offset = min(offsets[1], dex_size_list[dex_file_ind])
         # Meant to nullify data that does not meet offset criteria if specified
         if (dex_offset >= offsets[0] and dex_offset < max_offset and
                 elem[0] >= time_offsets[0] and elem[0] < time_offsets[1]):

             elem.insert(1, dex_offset)
             # Category that a data point belongs to
             elem.insert(2, data_lists["cat_list"][ind])
         else:
             elem[:] = 4 * [None]
             symbol_traces[ind] = None
             data_lists["cat_list"][ind] = None


 def print_category_info(cat_split, outname, out_dir_name, title):
     """Prints information of category and puts related traces in a files."""
     trace_counts_dict = Counter(cat_split)
     trace_counts_list_ordered = trace_counts_dict.most_common()
     print(53 * "-")
     print(title)
     print("\tNumber of distinct traces: " +
           str(len(trace_counts_list_ordered)))
     print("\tSum of trace counts: " +
           str(sum([trace[1] for trace in trace_counts_list_ordered])))
     print("\n\tCount: How many traces appeared with count\n\t", end="")
     print(Counter([trace[1] for trace in trace_counts_list_ordered]))
     with open(os.path.join(out_dir_name, outname), "w") as output_file:
         for trace in trace_counts_list_ordered:
             output_file.write("\n\nNumber of times appeared: " +
                               str(trace[1]) +
                               "\n")
             output_file.write(trace[0].strip())


 def print_categories(categories, symbol_file_split, out_dir_name):
     """Prints details of all categories."""
     symbol_file_split = [trace for trace in symbol_file_split
                          if trace is not None]
     # Info of traces containing a call to current category
     for cat_num, cat_name in enumerate(categories[1:]):
         print("\nCategory #%d" % (cat_num + 1))
         cat_split = [trace for trace in symbol_file_split
                      if cat_name in trace]
         cat_file_name = cat_name.lower() + "cat_output"
         print_category_info(cat_split, cat_file_name, out_dir_name,
                             "Traces containing: " + cat_name)
         noncat_split = [trace for trace in symbol_file_split
                         if cat_name not in trace]
         print_category_info(noncat_split, "non" + cat_file_name,
                             out_dir_name,
                             "Traces not containing: " +
                             cat_name)

     # All traces (including uncategorized) together
     print_category_info(symbol_file_split, "allcat_output",
                         out_dir_name,
                         "All traces together:")
     # Traces containing none of keywords
     # Only used if categories are passed in
     if len(categories) > 1:
         noncat_split = [trace for trace in symbol_file_split if
                         all(cat_name not in trace
                             for cat_name in categories)]
         print_category_info(noncat_split, "noncat_output",
                             out_dir_name,
                             "Uncategorized calls")


 def is_directory(path_name):
     """Checks if a path is an actual directory."""
     if not os.path.isdir(path_name):
         dir_error = "%s is not a directory" % (path_name)
         raise argparse.ArgumentTypeError(dir_error)
     return path_name


 def parse_args(argv):
     """Parses arguments passed in."""
     parser = argparse.ArgumentParser()
     parser.add_argument("-d", action="store",
                         default="", dest="out_dir_name", type=is_directory,
                         help="Output Directory")
     parser.add_argument("--dex-file", action="store",
                         default=None, dest="dex_file",
                         type=argparse.FileType("r"),
                         help="Baksmali Dex File Dump")
     parser.add_argument("--offsets", action="store", nargs=2,
                         default=[float(0), float("inf")],
                         dest="offsets",
                         metavar="OFFSET",
                         type=float,
                         help="Filters out accesses not between provided"
                              " offsets if provided. Can provide 'inf'"
                              " for infinity")
     parser.add_argument("--times", action="store", nargs=2,
                         default=[float(0), float("inf")],
                         dest="times",
                         metavar="TIME",
                         type=float,
                         help="Filters out accesses not between provided"
                              " time offsets if provided. Can provide 'inf'"
                              " for infinity")
     parser.add_argument("sanitizer_trace", action="store",
                         type=argparse.FileType("r"),
                         help="File containing sanitizer traces filtered by "
                              "prune_sanitizer_output.py")
     parser.add_argument("symbol_trace", action="store",
                         type=argparse.FileType("r"),
                         help="File containing symbolized traces that match "
                              "sanitizer_trace")
     parser.add_argument("dex_starts", action="store",
                         type=argparse.FileType("r"),
                         help="File containing starting addresses of Dex Files")
     parser.add_argument("categories", action="store", nargs="*",
                         help="Keywords expected to show in large amounts of"
                              " symbolized traces")

     return parser.parse_args(argv)


 def get_dex_offset_data(line, dex_file_item):
     """ Returns a tuple of dex file offset, item name, and data of a line."""
     return (int(line[:line.find(":")], 16),
             (dex_file_item,
              line.split("|")[1].strip())
             )


 def read_data(parsed_argv):
     """Reads data from filepath arguments and parses them into lists."""
     # Using a dictionary to establish relation between lists added
     data_lists = {}
     categories = parsed_argv.categories
     # Makes sure each trace maps to some category
     categories.insert(0, "Uncategorized")

     data_lists["offsets"] = parsed_argv.offsets
     data_lists["offsets"].sort()

     data_lists["times"] = parsed_argv.times
     data_lists["times"].sort()

     logcat_file_data = parsed_argv.sanitizer_trace.readlines()
     parsed_argv.sanitizer_trace.close()

     symbol_file_split = parsed_argv.symbol_trace.read().split("Stack Trace")
     # Removes text before first trace
     symbol_file_split = symbol_file_split[1:]
     parsed_argv.symbol_trace.close()

     dex_start_file_data = parsed_argv.dex_starts.readlines()
     parsed_argv.dex_starts.close()

     if parsed_argv.dex_file is not None:
         dex_file_data = parsed_argv.dex_file.read()
         parsed_argv.dex_file.close()
         # Splits baksmali dump by each item
         item_split = [s.splitlines() for s in re.split(r"\|\[[0-9]+\] ",
                                                        dex_file_data)]
         # Splits each item by line and creates a list of offsets and a
         # corresponding list of the data associated with that line
         offset_list, offset_data = zip(*[get_dex_offset_data(line, item[0])
                                          for item in item_split
                                          for line in item[1:]
                                          if re.search("[0-9a-f]{6}:", line)
                                          is not None and
                                          line.find("|") != -1])
         data_lists["offset_list"] = offset_list
         data_lists["offset_data"] = offset_data
     else:
         dex_file_data = None

     # Each element is a tuple of time and address accessed
     data_lists["plot_list"] = [[elem[1] for elem in enumerate(line.split())
                                 if elem[0] in (1, 11)
                                 ]
                                for line in logcat_file_data
                                if "use-after-poison" in line or
                                "unknown-crash" in line
                                ]
     # Contains a mapping between traces and the category they belong to
     # based on arguments
     data_lists["cat_list"] = [categories[find_match(categories, trace)]
                               for trace in symbol_file_split]

     # Contains a list of starting address of all dex files to calculate dex
     # offsets
     data_lists["dex_ends_list"] = [(int(line.split()[9], 16),
                                     int(line.split()[12])
                                     )
                                    for line in dex_start_file_data
                                    if "RegisterDexFile" in line
                                    ]
     # Dex File Starting addresses must be sorted because bisect requires sorted
     # lists.
     data_lists["dex_ends_list"].sort()

     return data_lists, categories, symbol_file_split


 def main():
     """Takes in trace information and outputs details about them."""
     parsed_argv = parse_args(None)
     data_lists, categories, symbol_file_split = read_data(parsed_argv)

     # Formats plot_list such that each element is a data point
     absolute_to_relative(data_lists, symbol_file_split)
     for file_ext, cat_name in enumerate(categories):
         out_file_name = os.path.join(parsed_argv.out_dir_name, "time_output_" +
                                      str(file_ext) +
                                      ".dat")
         with open(out_file_name, "w") as output_file:
             output_file.write("# Category: " + cat_name + "\n")
             output_file.write("# Time, Dex File Offset_10, Dex File Offset_16,"
                               " Address, Item Accessed, Item Member Accessed"
                               " Unaligned\n")
             for time, dex_offset, category, address in data_lists["plot_list"]:
                 if category == cat_name:
                     output_file.write(
                         str(time) +
                         " " +
                         str(dex_offset) +
                         " #" +
                         hex(dex_offset) +
                         " " +
                         str(address))
                     if "offset_list" in data_lists:
                         dex_offset_index = bisect.bisect(
                             data_lists["offset_list"],
                             dex_offset) - 1
                         aligned_dex_offset = (data_lists["offset_list"]
                                                         [dex_offset_index])
                         dex_offset_data = (data_lists["offset_data"]
                                                      [dex_offset_index])
                         output_file.write(
                             " " +
                             "|".join(dex_offset_data) +
                             " " +
                             str(aligned_dex_offset != dex_offset))
                     output_file.write("\n")
     print_categories(categories, symbol_file_split, parsed_argv.out_dir_name)


 if __name__ == "__main__":
     main()
	#!/usr/bin/env python
	#
	# Copyright (C) 2017 The Android Open Source Project
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Outputs quantitative information about Address Sanitizer traces."""

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	from collections import Counter
	from datetime import datetime
	import argparse
	import bisect
	import os
	import re


	def find_match(list_substrings, big_string):
	"""Returns the category a trace belongs to by searching substrings."""
	for ind, substr in enumerate(list_substrings):
	if big_string.find(substr) != -1:
	return ind
	return list_substrings.index("Uncategorized")


	def absolute_to_relative(data_lists, symbol_traces):
	"""Address changed to Dex File offset and shifting time to 0 min in ms."""

	offsets = data_lists["offsets"]
	time_offsets = data_lists["times"]

	# Format of time provided by logcat
	time_format_str = "%H:%M:%S.%f"
	first_access_time = datetime.strptime(data_lists["plot_list"][0][0],
	time_format_str)
	for ind, elem in enumerate(data_lists["plot_list"]):
	elem_date_time = datetime.strptime(elem[0], time_format_str)
	# Shift time values so that first access is at time 0 milliseconds
	elem[0] = int((elem_date_time - first_access_time).total_seconds() *
	1000)
	address_access = int(elem[1], 16)
	# For each poisoned address, find highest Dex File starting address less
	# than address_access
	dex_start_list, dex_size_list = zip(*data_lists["dex_ends_list"])
	dex_file_ind = bisect.bisect(dex_start_list, address_access) - 1
	dex_offset = address_access - dex_start_list[dex_file_ind]
	# Assumes that offsets is already sorted and constrains offset to be
	# within range of the dex_file
	max_offset = min(offsets[1], dex_size_list[dex_file_ind])
	# Meant to nullify data that does not meet offset criteria if specified
	if (dex_offset >= offsets[0] and dex_offset < max_offset and
	elem[0] >= time_offsets[0] and elem[0] < time_offsets[1]):

	elem.insert(1, dex_offset)
	# Category that a data point belongs to
	elem.insert(2, data_lists["cat_list"][ind])
	else:
	elem[:] = 4 * [None]
	symbol_traces[ind] = None
	data_lists["cat_list"][ind] = None


	def print_category_info(cat_split, outname, out_dir_name, title):
	"""Prints information of category and puts related traces in a files."""
	trace_counts_dict = Counter(cat_split)
	trace_counts_list_ordered = trace_counts_dict.most_common()
	print(53 * "-")
	print(title)
	print("\tNumber of distinct traces: " +
	str(len(trace_counts_list_ordered)))
	print("\tSum of trace counts: " +
	str(sum([trace[1] for trace in trace_counts_list_ordered])))
	print("\n\tCount: How many traces appeared with count\n\t", end="")
	print(Counter([trace[1] for trace in trace_counts_list_ordered]))
	with open(os.path.join(out_dir_name, outname), "w") as output_file:
	for trace in trace_counts_list_ordered:
	output_file.write("\n\nNumber of times appeared: " +
	str(trace[1]) +
	"\n")
	output_file.write(trace[0].strip())


	def print_categories(categories, symbol_file_split, out_dir_name):
	"""Prints details of all categories."""
	symbol_file_split = [trace for trace in symbol_file_split
	if trace is not None]
	# Info of traces containing a call to current category
	for cat_num, cat_name in enumerate(categories[1:]):
	print("\nCategory #%d" % (cat_num + 1))
	cat_split = [trace for trace in symbol_file_split
	if cat_name in trace]
	cat_file_name = cat_name.lower() + "cat_output"
	print_category_info(cat_split, cat_file_name, out_dir_name,
	"Traces containing: " + cat_name)
	noncat_split = [trace for trace in symbol_file_split
	if cat_name not in trace]
	print_category_info(noncat_split, "non" + cat_file_name,
	out_dir_name,
	"Traces not containing: " +
	cat_name)

	# All traces (including uncategorized) together
	print_category_info(symbol_file_split, "allcat_output",
	out_dir_name,
	"All traces together:")
	# Traces containing none of keywords
	# Only used if categories are passed in
	if len(categories) > 1:
	noncat_split = [trace for trace in symbol_file_split if
	all(cat_name not in trace
	for cat_name in categories)]
	print_category_info(noncat_split, "noncat_output",
	out_dir_name,
	"Uncategorized calls")


	def is_directory(path_name):
	"""Checks if a path is an actual directory."""
	if not os.path.isdir(path_name):
	dir_error = "%s is not a directory" % (path_name)
	raise argparse.ArgumentTypeError(dir_error)
	return path_name


	def parse_args(argv):
	"""Parses arguments passed in."""
	parser = argparse.ArgumentParser()
	parser.add_argument("-d", action="store",
	default="", dest="out_dir_name", type=is_directory,
	help="Output Directory")
	parser.add_argument("--dex-file", action="store",
	default=None, dest="dex_file",
	type=argparse.FileType("r"),
	help="Baksmali Dex File Dump")
	parser.add_argument("--offsets", action="store", nargs=2,
	default=[float(0), float("inf")],
	dest="offsets",
	metavar="OFFSET",
	type=float,
	help="Filters out accesses not between provided"
	" offsets if provided. Can provide 'inf'"
	" for infinity")
	parser.add_argument("--times", action="store", nargs=2,
	default=[float(0), float("inf")],
	dest="times",
	metavar="TIME",
	type=float,
	help="Filters out accesses not between provided"
	" time offsets if provided. Can provide 'inf'"
	" for infinity")
	parser.add_argument("sanitizer_trace", action="store",
	type=argparse.FileType("r"),
	help="File containing sanitizer traces filtered by "
	"prune_sanitizer_output.py")
	parser.add_argument("symbol_trace", action="store",
	type=argparse.FileType("r"),
	help="File containing symbolized traces that match "
	"sanitizer_trace")
	parser.add_argument("dex_starts", action="store",
	type=argparse.FileType("r"),
	help="File containing starting addresses of Dex Files")
	parser.add_argument("categories", action="store", nargs="*",
	help="Keywords expected to show in large amounts of"
	" symbolized traces")

	return parser.parse_args(argv)


	def get_dex_offset_data(line, dex_file_item):
	""" Returns a tuple of dex file offset, item name, and data of a line."""
	return (int(line[:line.find(":")], 16),
	(dex_file_item,
	line.split("\|")[1].strip())
	)


	def read_data(parsed_argv):
	"""Reads data from filepath arguments and parses them into lists."""
	# Using a dictionary to establish relation between lists added
	data_lists = {}
	categories = parsed_argv.categories
	# Makes sure each trace maps to some category
	categories.insert(0, "Uncategorized")

	data_lists["offsets"] = parsed_argv.offsets
	data_lists["offsets"].sort()

	data_lists["times"] = parsed_argv.times
	data_lists["times"].sort()

	logcat_file_data = parsed_argv.sanitizer_trace.readlines()
	parsed_argv.sanitizer_trace.close()

	symbol_file_split = parsed_argv.symbol_trace.read().split("Stack Trace")
	# Removes text before first trace
	symbol_file_split = symbol_file_split[1:]
	parsed_argv.symbol_trace.close()

	dex_start_file_data = parsed_argv.dex_starts.readlines()
	parsed_argv.dex_starts.close()

	if parsed_argv.dex_file is not None:
	dex_file_data = parsed_argv.dex_file.read()
	parsed_argv.dex_file.close()
	# Splits baksmali dump by each item
	item_split = [s.splitlines() for s in re.split(r"\\|\[[0-9]+\] ",
	dex_file_data)]
	# Splits each item by line and creates a list of offsets and a
	# corresponding list of the data associated with that line
	offset_list, offset_data = zip(*[get_dex_offset_data(line, item[0])
	for item in item_split
	for line in item[1:]
	if re.search("[0-9a-f]{6}:", line)
	is not None and
	line.find("\|") != -1])
	data_lists["offset_list"] = offset_list
	data_lists["offset_data"] = offset_data
	else:
	dex_file_data = None

	# Each element is a tuple of time and address accessed
	data_lists["plot_list"] = [[elem[1] for elem in enumerate(line.split())
	if elem[0] in (1, 11)
	]
	for line in logcat_file_data
	if "use-after-poison" in line or
	"unknown-crash" in line
	]
	# Contains a mapping between traces and the category they belong to
	# based on arguments
	data_lists["cat_list"] = [categories[find_match(categories, trace)]
	for trace in symbol_file_split]

	# Contains a list of starting address of all dex files to calculate dex
	# offsets
	data_lists["dex_ends_list"] = [(int(line.split()[9], 16),
	int(line.split()[12])
	)
	for line in dex_start_file_data
	if "RegisterDexFile" in line
	]
	# Dex File Starting addresses must be sorted because bisect requires sorted
	# lists.
	data_lists["dex_ends_list"].sort()

	return data_lists, categories, symbol_file_split


	def main():
	"""Takes in trace information and outputs details about them."""
	parsed_argv = parse_args(None)
	data_lists, categories, symbol_file_split = read_data(parsed_argv)

	# Formats plot_list such that each element is a data point
	absolute_to_relative(data_lists, symbol_file_split)
	for file_ext, cat_name in enumerate(categories):
	out_file_name = os.path.join(parsed_argv.out_dir_name, "time_output_" +
	str(file_ext) +
	".dat")
	with open(out_file_name, "w") as output_file:
	output_file.write("# Category: " + cat_name + "\n")
	output_file.write("# Time, Dex File Offset_10, Dex File Offset_16,"
	" Address, Item Accessed, Item Member Accessed"
	" Unaligned\n")
	for time, dex_offset, category, address in data_lists["plot_list"]:
	if category == cat_name:
	output_file.write(
	str(time) +
	" " +
	str(dex_offset) +
	" #" +
	hex(dex_offset) +
	" " +
	str(address))
	if "offset_list" in data_lists:
	dex_offset_index = bisect.bisect(
	data_lists["offset_list"],
	dex_offset) - 1
	aligned_dex_offset = (data_lists["offset_list"]
	[dex_offset_index])
	dex_offset_data = (data_lists["offset_data"]
	[dex_offset_index])
	output_file.write(
	" " +
	"\|".join(dex_offset_data) +
	" " +
	str(aligned_dex_offset != dex_offset))
	output_file.write("\n")
	print_categories(categories, symbol_file_split, parsed_argv.out_dir_name)


	if __name__ == "__main__":
	main()