diff options
| -rwxr-xr-x | tools/runtime_memusage/sanitizer_logcat_analysis.sh | 308 | ||||
| -rwxr-xr-x | tools/runtime_memusage/symbol_trace_info.py | 122 |
2 files changed, 321 insertions, 109 deletions
diff --git a/tools/runtime_memusage/sanitizer_logcat_analysis.sh b/tools/runtime_memusage/sanitizer_logcat_analysis.sh index 75cb9a927e..66b48fafd0 100755 --- a/tools/runtime_memusage/sanitizer_logcat_analysis.sh +++ b/tools/runtime_memusage/sanitizer_logcat_analysis.sh @@ -18,13 +18,22 @@ # # This script takes in a logcat containing Sanitizer traces and outputs several # files, prints information regarding the traces, and plots information as well. +ALL_PIDS=false USE_TEMP=true DO_REDO=false +PACKAGE_NAME="" # EXACT_ARG and MIN_ARG are passed to prune_sanitizer_output.py EXACT_ARG="" MIN_ARG="" +OFFSET_ARGS="" +TIME_ARGS="" usage() { echo "Usage: $0 [options] [LOGCAT_FILE] [CATEGORIES...]" + echo " -a" + echo " Forces all pids associated with registered dex" + echo " files in the logcat to be processed." + echo " default: only the last pid is processed" + echo echo " -d OUT_DIRECTORY" echo " Puts all output in specified directory." echo " If not given, output will be put in a local" @@ -37,7 +46,7 @@ usage() { echo " the -m argument or by prune_sanitizer_output.py" echo echo " -f" - echo " forces redo of all commands even if output" + echo " Forces redo of all commands even if output" echo " files exist. Steps are skipped if their output" echo " exist already and this is not enabled." echo @@ -46,6 +55,22 @@ usage() { echo " at least MINIMUM_CALLS_PER_TRACE lines." echo " default: specified by prune_sanitizer_output.py" echo + echo " -o [OFFSET],[OFFSET]" + echo " Filters out all Dex File offsets outside the" + echo " range between provided offsets. 'inf' can be" + echo " provided for infinity." + echo " default: 0,inf" + echo + echo " -p [PACKAGE_NAME]" + echo " Using the package name, uses baksmali to get" + echo " a dump of the Dex File format for the package." + echo + echo " -t [TIME_OFFSET],[TIME_OFFSET]" + echo " Filters out all time offsets outside the" + echo " range between provided offsets. 'inf' can be" + echo " provided for infinity." + echo " default: 0,inf" + echo echo " CATEGORIES are words that are expected to show in" echo " a large subset of symbolized traces. Splits" echo " output based on each word." @@ -55,33 +80,61 @@ usage() { } -while [[ $# -gt 1 ]]; do -case $1 in - -d) - shift - USE_TEMP=false - OUT_DIR=$1 - shift - break - ;; - -e) - shift - EXACT_ARG='-e' - ;; - -f) - shift - DO_REDO=true - ;; - -m) - shift - MIN_ARG='-m '"$1"'' - shift - ;; - *) - usage - exit +while getopts ":ad:efm:o:p:t:" opt ; do +case ${opt} in + a) + ALL_PIDS=true + ;; + d) + USE_TEMP=false + OUT_DIR=$OPTARG + ;; + e) + EXACT_ARG='-e' + ;; + f) + DO_REDO=true + ;; + m) + if ! [ "$OPTARG" -eq "$OPTARG" ]; then + usage + exit + fi + MIN_ARG='-m '"$OPTARG" + ;; + o) + set -f + OLD_IFS=$IFS + IFS="," + OFFSET_ARGS=( $OPTARG ) + if [ "${#OFFSET_ARGS[@]}" -ne 2 ]; then + usage + exit + fi + OFFSET_ARGS=( "--offsets" "${OFFSET_ARGS[@]}" ) + IFS=$OLD_IFS + ;; + t) + set -f + OLD_IFS=$IFS + IFS="," + TIME_ARGS=( $OPTARG ) + if [ "${#TIME_ARGS[@]}" -ne 2 ]; then + usage + exit + fi + TIME_ARGS=( "--times" "${TIME_ARGS[@]}" ) + IFS=$OLD_IFS + ;; + p) + PACKAGE_NAME=$OPTARG + ;; + \?) + usage + exit esac done +shift $((OPTIND -1)) if [ $# -lt 1 ]; then usage @@ -103,78 +156,147 @@ if [ ! -d "$OUT_DIR" ]; then fi # Note: Steps are skipped if their output exists until -f flag is enabled -# Step 1 - Only output lines related to Sanitizer -# Folder that holds all file output echo "Output folder: $OUT_DIR" -ASAN_OUT=$OUT_DIR/asan_output -if [ ! -f $ASAN_OUT ] || [ $DO_REDO = true ]; then - DO_REDO=true - echo "Extracting ASAN output" - grep "app_process64" $LOGCAT_FILE > $ASAN_OUT -else - echo "Skipped: Extracting ASAN output" +unique_pids=( $(grep "RegisterDexFile" "$LOGCAT_FILE" | grep -v "zygote64" | tr -s ' ' | cut -f3 -d' ' | awk '!a[$0]++') ) +echo "List of pids: ${unique_pids[@]}" +if [ $ALL_PIDS = false ]; then + unique_pids=( ${unique_pids[-1]} ) fi -# Step 2 - Only output lines containing Dex File Start Addresses -DEX_START=$OUT_DIR/dex_start -if [ ! -f $DEX_START ] || [ $DO_REDO = true ]; then - DO_REDO=true - echo "Extracting Start of Dex File(s)" - grep "RegisterDexFile" $LOGCAT_FILE > $DEX_START -else - echo "Skipped: Extracting Start of Dex File(s)" -fi +for pid in "${unique_pids[@]}" +do + echo + echo "Current pid: $pid" + echo + PID_DIR=$OUT_DIR/$pid + if [ ! -d "$PID_DIR" ]; then + mkdir $PID_DIR + DO_REDO[$pid]=true + fi -# Step 3 - Clean Sanitizer output from Step 2 since logcat cannot -# handle large amounts of output. -ASAN_OUT_FILTERED=$OUT_DIR/asan_output_filtered -if [ ! -f $ASAN_OUT_FILTERED ] || [ $DO_REDO = true ]; then - DO_REDO=true - echo "Filtering/Cleaning ASAN output" - python $ANDROID_BUILD_TOP/art/tools/runtime_memusage/prune_sanitizer_output.py \ - $EXACT_ARG $MIN_ARG -d $OUT_DIR $ASAN_OUT -else - echo "Skipped: Filtering/Cleaning ASAN output" -fi + INTERMEDIATES_DIR=$PID_DIR/intermediates + RESULTS_DIR=$PID_DIR/results + LOGCAT_PID_FILE=$PID_DIR/logcat -# Step 4 - Retrieve symbolized stack traces from Step 3 output -SYM_FILTERED=$OUT_DIR/sym_filtered -if [ ! -f $SYM_FILTERED ] || [ $DO_REDO = true ]; then - DO_REDO=true - echo "Retrieving symbolized traces" - $ANDROID_BUILD_TOP/development/scripts/stack $ASAN_OUT_FILTERED > $SYM_FILTERED -else - echo "Skipped: Retrieving symbolized traces" -fi + if [ ! -f "$PID_DIR/logcat" ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then + DO_REDO[$pid]=true + awk '{if($3 == '$pid') print $0}' $LOGCAT_FILE > $LOGCAT_PID_FILE + fi -# Step 5 - Using Steps 2, 3, 4 outputs in order to output graph data -# and trace data -# Only the category names are needed for the commands giving final output -shift -TIME_OUTPUT=($OUT_DIR/time_output_*.dat) -if [ ! -e ${TIME_OUTPUT[0]} ] || [ $DO_REDO = true ]; then - DO_REDO=true - echo "Creating Categorized Time Table" - python $ANDROID_BUILD_TOP/art/tools/runtime_memusage/symbol_trace_info.py \ - -d $OUT_DIR $ASAN_OUT_FILTERED $SYM_FILTERED $DEX_START $@ -else - echo "Skipped: Creating Categorized Time Table" -fi + if [ ! -d "$INTERMEDIATES_DIR" ]; then + mkdir $INTERMEDIATES_DIR + DO_REDO[$pid]=true + fi -# Step 6 - Use graph data from Step 5 to plot graph -# Contains the category names used for legend of gnuplot -PLOT_CATS=`echo \"Uncategorized $@\"` -echo "Plotting Categorized Time Table" -# Plots the information from logcat -gnuplot --persist -e \ - 'filename(n) = sprintf("'"$OUT_DIR"'/time_output_%d.dat", n); - catnames = '"$PLOT_CATS"'; - set title "Dex File Offset vs. Time accessed since App Start"; - set xlabel "Time (milliseconds)"; - set ylabel "Dex File Offset (bytes)"; - plot for [i=0:'"$NUM_CAT"'] filename(i) using 1:2 title word(catnames, i + 1);' + # Step 1 - Only output lines related to Sanitizer + # Folder that holds all file output + ASAN_OUT=$INTERMEDIATES_DIR/asan_output + if [ ! -f $ASAN_OUT ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then + DO_REDO[$pid]=true + echo "Extracting ASAN output" + grep "app_process64" $LOGCAT_PID_FILE > $ASAN_OUT + else + echo "Skipped: Extracting ASAN output" + fi -if [ $USE_TEMP = true ]; then - echo "Removing temp directory and files" - rm -rf $OUT_DIR -fi + # Step 2 - Only output lines containing Dex File Start Addresses + DEX_START=$INTERMEDIATES_DIR/dex_start + if [ ! -f $DEX_START ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then + DO_REDO[$pid]=true + echo "Extracting Start of Dex File(s)" + grep "RegisterDexFile" $LOGCAT_PID_FILE > $DEX_START + else + echo "Skipped: Extracting Start of Dex File(s)" + fi + + # Step 3 - Clean Sanitizer output from Step 2 since logcat cannot + # handle large amounts of output. + ASAN_OUT_FILTERED=$INTERMEDIATES_DIR/asan_output_filtered + if [ ! -f $ASAN_OUT_FILTERED ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then + DO_REDO[$pid]=true + echo "Filtering/Cleaning ASAN output" + python $ANDROID_BUILD_TOP/art/tools/runtime_memusage/prune_sanitizer_output.py \ + $EXACT_ARG $MIN_ARG -d $INTERMEDIATES_DIR $ASAN_OUT + else + echo "Skipped: Filtering/Cleaning ASAN output" + fi + + # Step 4 - Retrieve symbolized stack traces from Step 3 output + SYM_FILTERED=$INTERMEDIATES_DIR/sym_filtered + if [ ! -f $SYM_FILTERED ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then + DO_REDO[$pid]=true + echo "Retrieving symbolized traces" + $ANDROID_BUILD_TOP/development/scripts/stack $ASAN_OUT_FILTERED > $SYM_FILTERED + else + echo "Skipped: Retrieving symbolized traces" + fi + + # Step 4.5 - Obtain Dex File Format of dex file related to package + BAKSMALI_DMP_OUT="$INTERMEDIATES_DIR""/baksmali_dex_file" + BAKSMALI_DMP_ARG="--dex-file="$BAKSMALI_DMP_OUT + if [ ! -f $BAKSMALI_DMP_OUT ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then + if [ $PACKAGE_NAME != "" ]; then + # Extracting Dex File path on device from Dex File related to package + apk_directory=$(dirname $(grep $PACKAGE_NAME $DEX_START | tail -n1 | awk '{print $8}')) + apk_dex_files=$(adb shell find $apk_directory -name "*.?dex" -type f 2> /dev/null) + for apk_file in $apk_dex_files; do + base_name=$(basename $apk_file) + adb pull $apk_file $INTERMEDIATES_DIR/base."${base_name#*.}" + done + oatdump --oat-file=$INTERMEDIATES_DIR/base.odex --export-dex-to=$INTERMEDIATES_DIR --output=/dev/null + export_dex=( $INTERMEDIATES_DIR/*apk_export* ) + baksmali -JXmx1024M dump $export_dex > $BAKSMALI_DMP_OUT 2> /dev/null + if ! [ -s $BAKSMALI_DMP_OUT ]; then + rm $BAKSMALI_DMP_OUT + BAKSMALI_DMP_ARG="" + echo "Failed to retrieve Dex File format" + fi + else + BAKSMALI_DMP_ARG="" + echo "Failed to retrieve Dex File format" + fi + else + echo "Skipped: Retrieving Dex File format from baksmali" + fi + + if [ ! -d "$RESULTS_DIR" ]; then + mkdir $RESULTS_DIR + DO_REDO[$pid]=true + fi + + # Step 5 - Using Steps 2, 3, 4 outputs in order to output graph data + # and trace data + # Only the category names are needed for the commands giving final output + shift + TIME_OUTPUT=($RESULTS_DIR/time_output_*.dat) + if [ ! -e ${TIME_OUTPUT[0]} ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then + DO_REDO[$pid]=true + echo "Creating Categorized Time Table" + python $ANDROID_BUILD_TOP/art/tools/runtime_memusage/symbol_trace_info.py \ + -d $RESULTS_DIR ${OFFSET_ARGS[@]} ${TIME_ARGS[@]} $BAKSMALI_DMP_ARG $ASAN_OUT_FILTERED $SYM_FILTERED $DEX_START $@ + else + echo "Skipped: Creating Categorized Time Table" + fi + + # Step 6 - Use graph data from Step 5 to plot graph + # Contains the category names used for legend of gnuplot + PLOT_CATS=`echo \"Uncategorized $@\"` + PACKAGE_STRING="" + if [ $PACKAGE_NAME != "" ]; then + PACKAGE_STRING="Package name: "$PACKAGE_NAME" " + fi + echo "Plotting Categorized Time Table" + # Plots the information from logcat + gnuplot --persist -e \ + 'filename(n) = sprintf("'"$RESULTS_DIR"'/time_output_%d.dat", n); + catnames = '"$PLOT_CATS"'; + set title "'"$PACKAGE_STRING"'PID: '"$pid"'"; + set xlabel "Time (milliseconds)"; + set ylabel "Dex File Offset (bytes)"; + plot for [i=0:'"$NUM_CAT"'] filename(i) using 1:2 title word(catnames, i + 1);' + + if [ $USE_TEMP = true ]; then + echo "Removing temp directory and files" + rm -rf $OUT_DIR + fi +done diff --git a/tools/runtime_memusage/symbol_trace_info.py b/tools/runtime_memusage/symbol_trace_info.py index e539be2217..a5ced380f0 100755 --- a/tools/runtime_memusage/symbol_trace_info.py +++ b/tools/runtime_memusage/symbol_trace_info.py @@ -25,7 +25,7 @@ from datetime import datetime import argparse import bisect import os -import sys +import re def find_match(list_substrings, big_string): @@ -36,8 +36,13 @@ def find_match(list_substrings, big_string): return list_substrings.index("Uncategorized") -def absolute_to_relative(plot_list, dex_start_list, cat_list): +def absolute_to_relative(data_lists, symbol_traces): """Address changed to Dex File offset and shifting time to 0 min in ms.""" + plot_list = data_lists["plot_list"] + dex_start_list = data_lists["dex_start_list"] + cat_list = data_lists["cat_list"] + offsets = data_lists["offsets"] + time_offsets = data_lists["time_offsets"] time_format_str = "%H:%M:%S.%f" first_access_time = datetime.strptime(plot_list[0][0], time_format_str) @@ -52,9 +57,22 @@ def absolute_to_relative(plot_list, dex_start_list, cat_list): dex_file_start = dex_start_list[bisect.bisect(dex_start_list, address_access) - 1 ] - elem.insert(1, address_access - dex_file_start) - # Category that a data point belongs to - elem.insert(2, cat_list[ind]) + dex_offset = address_access - dex_file_start + # Meant to nullify data that does not meet offset criteria if specified + # Assumes that offsets is already sorted + if (dex_offset >= offsets[0] and dex_offset < offsets[1] and + elem[0] >= time_offsets[0] and elem[0] < time_offsets[1]): + + elem.insert(1, dex_offset) + # Category that a data point belongs to + elem.insert(2, cat_list[ind]) + else: + elem[0] = None + elem[1] = None + elem.append(None) + elem.append(None) + symbol_traces[ind] = None + cat_list[ind] = None def print_category_info(cat_split, outname, out_dir_name, title): @@ -67,7 +85,7 @@ def print_category_info(cat_split, outname, out_dir_name, title): str(len(trace_counts_list_ordered))) print("\tSum of trace counts: " + str(sum([trace[1] for trace in trace_counts_list_ordered]))) - print("\n\tCount: How many traces appeared with count\n\t") + print("\n\tCount: How many traces appeared with count\n\t", end="") print(Counter([trace[1] for trace in trace_counts_list_ordered])) with open(os.path.join(out_dir_name, outname), "w") as output_file: for trace in trace_counts_list_ordered: @@ -79,6 +97,8 @@ def print_category_info(cat_split, outname, out_dir_name, title): def print_categories(categories, symbol_file_split, out_dir_name): """Prints details of all categories.""" + symbol_file_split = [trace for trace in symbol_file_split + if trace is not None] # Info of traces containing a call to current category for cat_num, cat_name in enumerate(categories[1:]): print("\nCategory #%d" % (cat_num + 1)) @@ -123,6 +143,26 @@ def parse_args(argv): parser.add_argument("-d", action="store", default="", dest="out_dir_name", type=is_directory, help="Output Directory") + parser.add_argument("--dex-file", action="store", + default=None, dest="dex_file", + type=argparse.FileType("r"), + help="Baksmali Dex File Dump") + parser.add_argument("--offsets", action="store", nargs=2, + default=[float(0), float("inf")], + dest="offsets", + metavar="OFFSET", + type=float, + help="Filters out accesses not between provided" + " offsets if provided. Can provide 'inf'" + " for infinity") + parser.add_argument("--times", action="store", nargs=2, + default=[float(0), float("inf")], + dest="times", + metavar="TIME", + type=float, + help="Filters out accesses not between provided" + " time offsets if provided. Can provide 'inf'" + " for infinity") parser.add_argument("sanitizer_trace", action="store", type=argparse.FileType("r"), help="File containing sanitizer traces filtered by " @@ -141,6 +181,14 @@ def parse_args(argv): return parser.parse_args(argv) +def get_dex_offset_data(line, dex_file_item): + """ Returns a tuple of dex file offset, item name, and data of a line.""" + return (int(line[:line.find(":")], 16), + (dex_file_item, + line.split("|")[1].strip()) + ) + + def read_data(parsed_argv): """Reads data from filepath arguments and parses them into lists.""" # Using a dictionary to establish relation between lists added @@ -149,6 +197,12 @@ def read_data(parsed_argv): # Makes sure each trace maps to some category categories.insert(0, "Uncategorized") + data_lists["offsets"] = parsed_argv.offsets + data_lists["offsets"].sort() + + data_lists["times"] = parsed_argv.times + data_lists["times"].sort() + logcat_file_data = parsed_argv.sanitizer_trace.readlines() parsed_argv.sanitizer_trace.close() @@ -159,6 +213,25 @@ def read_data(parsed_argv): dex_start_file_data = parsed_argv.dex_starts.readlines() parsed_argv.dex_starts.close() + if parsed_argv.dex_file != None: + dex_file_data = parsed_argv.dex_file.read() + parsed_argv.dex_file.close() + # Splits baksmali dump by each item + item_split = [s.splitlines() for s in re.split(r"\|\[[0-9]+\] ", + dex_file_data)] + # Splits each item by line and creates a list of offsets and a + # corresponding list of the data associated with that line + offset_list, offset_data = zip(*[get_dex_offset_data(line, item[0]) + for item in item_split + for line in item[1:] + if re.search("[0-9a-f]{6}:", line) + is not None + and line.find("|") != -1]) + data_lists["offset_list"] = offset_list + data_lists["offset_data"] = offset_data + else: + dex_file_data = None + # Each element is a tuple of time and address accessed data_lists["plot_list"] = [[elem[1] for elem in enumerate(line.split()) if elem[0] in (1, 11) @@ -184,23 +257,26 @@ def read_data(parsed_argv): return data_lists, categories, symbol_file_split -def main(argv=None): +def main(): """Takes in trace information and outputs details about them.""" - if argv is None: - argv = sys.argv - parsed_argv = parse_args(argv[1:]) + parsed_argv = parse_args(None) data_lists, categories, symbol_file_split = read_data(parsed_argv) + # Formats plot_list such that each element is a data point - absolute_to_relative(data_lists["plot_list"], data_lists["dex_start_list"], - data_lists["cat_list"]) + #absolute_to_relative(data_lists["plot_list"], data_lists["dex_start_list"], + # data_lists["cat_list"], data_lists["offsets"], + # data_lists["times"], symbol_file_split) + absolute_to_relative(data_lists, symbol_file_split) for file_ext, cat_name in enumerate(categories): out_file_name = os.path.join(parsed_argv.out_dir_name, "time_output_" + str(file_ext) + ".dat") with open(out_file_name, "w") as output_file: output_file.write("# Category: " + cat_name + "\n") - output_file.write("# Time, Dex File Offset, Address \n") + output_file.write("# Time, Dex File Offset_10, Dex File Offset_16," + " Address, Item Accessed, Item Member Accessed" + " Unaligned\n") for time, dex_offset, category, address in data_lists["plot_list"]: if category == cat_name: output_file.write( @@ -208,9 +284,23 @@ def main(argv=None): " " + str(dex_offset) + " #" + - str(address) + - "\n") - + hex(dex_offset) + + " " + + str(address)) + if data_lists.has_key("offset_list"): + dex_offset_index = bisect.bisect( + data_lists["offset_list"], + dex_offset) - 1 + aligned_dex_offset = (data_lists["offset_list"] + [dex_offset_index]) + dex_offset_data = (data_lists["offset_data"] + [dex_offset_index]) + output_file.write( + " " + + "|".join(dex_offset_data) + + " " + + str(aligned_dex_offset != dex_offset)) + output_file.write("\n") print_categories(categories, symbol_file_split, parsed_argv.out_dir_name) |