diff options
| -rw-r--r-- | tools/runtime_memusage/README | 32 | ||||
| -rwxr-xr-x | tools/runtime_memusage/prune_sanitizer_output.py | 15 | ||||
| -rwxr-xr-x | tools/runtime_memusage/sanitizer_logcat_analysis.sh | 226 | ||||
| -rwxr-xr-x | tools/runtime_memusage/symbol_trace_info.py | 84 |
4 files changed, 230 insertions, 127 deletions
diff --git a/tools/runtime_memusage/README b/tools/runtime_memusage/README index 2543df1a35..2af1de5dbe 100644 --- a/tools/runtime_memusage/README +++ b/tools/runtime_memusage/README @@ -40,6 +40,17 @@ unnecessary trace information. =========================================================================== Usage: sanitizer_logcat_analysis.sh [options] [LOGCAT_FILE] [CATEGORIES...] + -a + Forces all pids associated with registered dex + files in the logcat to be processed. + default: only the last pid is processed + + -b [DEX_FILE_NUMBER] + Outputs data for the specified baksmali + dump if -p is provided. + default: first baksmali dump in order of dex + file registration + -d OUT_DIRECTORY Puts all output in specified directory. If not given, output will be put in a local @@ -52,14 +63,31 @@ Usage: sanitizer_logcat_analysis.sh [options] [LOGCAT_FILE] [CATEGORIES...] the -m argument or by prune_sanitizer_output.py -f - forces redo of all commands even if output - files exist. + Forces redo of all commands even if output + files exist. Steps are skipped if their output + exist already and this is not enabled. -m [MINIMUM_CALLS_PER_TRACE] Filters out all traces that do not have at least MINIMUM_CALLS_PER_TRACE lines. default: specified by prune_sanitizer_output.py + -o [OFFSET],[OFFSET] + Filters out all Dex File offsets outside the + range between provided offsets. 'inf' can be + provided for infinity. + default: 0,inf + + -p [PACKAGE_NAME] + Using the package name, uses baksmali to get + a dump of the Dex File format for the package. + + -t [TIME_OFFSET],[TIME_OFFSET] + Filters out all time offsets outside the + range between provided offsets. 'inf' can be + provided for infinity. + default: 0,inf + CATEGORIES are words that are expected to show in a large subset of symbolized traces. Splits output based on each word. diff --git a/tools/runtime_memusage/prune_sanitizer_output.py b/tools/runtime_memusage/prune_sanitizer_output.py index d95b2ced1c..3cc51cfa51 100755 --- a/tools/runtime_memusage/prune_sanitizer_output.py +++ b/tools/runtime_memusage/prune_sanitizer_output.py @@ -33,7 +33,7 @@ def match_to_int(match): """ # Hard coded string are necessary since each trace must have the address # accessed, which is printed before trace lines. - if match == "use-after-poison": + if match == "use-after-poison" or match == "unknown-crash": return -2 elif match == "READ": return -1 @@ -43,6 +43,9 @@ def match_to_int(match): def clean_trace_if_valid(trace, stack_min_size, prune_exact): """Cleans trace if it meets a certain standard. Returns None otherwise.""" + # Note: Sample input may contain "unknown-crash" instead of + # "use-after-poison" + # # Sample input: # trace: # "...ERROR: AddressSanitizer: use-after-poison on address 0x0071126a870a... @@ -68,6 +71,7 @@ def clean_trace_if_valid(trace, stack_min_size, prune_exact): trace_line_matches = [(match_to_int(match.group()), match.start()) for match in re.finditer("#[0-9]+ " "|use-after-poison" + "|unknown-crash" "|READ", trace) ] # Finds the first index where the line number ordering isn't in sequence or @@ -135,16 +139,17 @@ def main(): ] trace_clean_split = [trace for trace in trace_clean_split if trace is not None] - - outfile = os.path.join(out_dir_name, trace_file.name + "_filtered") + filename = os.path.basename(trace_file.name + "_filtered") + outfile = os.path.join(out_dir_name, filename) with open(outfile, "w") as output_file: output_file.write(STACK_DIVIDER.join(trace_clean_split)) filter_percent = 100.0 - (float(len(trace_clean_split)) / len(trace_split) * 100) filter_amount = len(trace_split) - len(trace_clean_split) - print("Filtered out %d (%f%%) of %d." - % (filter_amount, filter_percent, len(trace_split))) + print("Filtered out %d (%f%%) of %d. %d (%f%%) remain." + % (filter_amount, filter_percent, len(trace_split), + len(trace_split) - filter_amount, 1 - filter_percent)) if __name__ == "__main__": diff --git a/tools/runtime_memusage/sanitizer_logcat_analysis.sh b/tools/runtime_memusage/sanitizer_logcat_analysis.sh index 66b48fafd0..e1a8161f0f 100755 --- a/tools/runtime_memusage/sanitizer_logcat_analysis.sh +++ b/tools/runtime_memusage/sanitizer_logcat_analysis.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # Copyright (C) 2017 The Android Open Source Project # @@ -22,11 +22,12 @@ ALL_PIDS=false USE_TEMP=true DO_REDO=false PACKAGE_NAME="" +BAKSMALI_NUM=0 # EXACT_ARG and MIN_ARG are passed to prune_sanitizer_output.py EXACT_ARG="" -MIN_ARG="" -OFFSET_ARGS="" -TIME_ARGS="" +MIN_ARG=() +OFFSET_ARGS=() +TIME_ARGS=() usage() { echo "Usage: $0 [options] [LOGCAT_FILE] [CATEGORIES...]" echo " -a" @@ -34,6 +35,12 @@ usage() { echo " files in the logcat to be processed." echo " default: only the last pid is processed" echo + echo " -b [DEX_FILE_NUMBER]" + echo " Outputs data for the specified baksmali" + echo " dump if -p is provided." + echo " default: first baksmali dump in order of dex" + echo " file registration" + echo echo " -d OUT_DIRECTORY" echo " Puts all output in specified directory." echo " If not given, output will be put in a local" @@ -80,11 +87,18 @@ usage() { } -while getopts ":ad:efm:o:p:t:" opt ; do +while getopts ":ab:d:efm:o:p:t:" opt ; do case ${opt} in a) ALL_PIDS=true ;; + b) + if ! [[ "$OPTARG" -eq "$OPTARG" ]]; then + usage + exit + fi + BAKSMALI_NUM=$OPTARG + ;; d) USE_TEMP=false OUT_DIR=$OPTARG @@ -96,35 +110,37 @@ case ${opt} in DO_REDO=true ;; m) - if ! [ "$OPTARG" -eq "$OPTARG" ]; then + if ! [[ "$OPTARG" -eq "$OPTARG" ]]; then usage exit fi - MIN_ARG='-m '"$OPTARG" + MIN_ARG=( "-m" "$OPTARG" ) ;; o) set -f - OLD_IFS=$IFS + old_ifs=$IFS IFS="," OFFSET_ARGS=( $OPTARG ) - if [ "${#OFFSET_ARGS[@]}" -ne 2 ]; then + if [[ "${#OFFSET_ARGS[@]}" -ne 2 ]]; then usage exit fi OFFSET_ARGS=( "--offsets" "${OFFSET_ARGS[@]}" ) - IFS=$OLD_IFS + IFS=$old_ifs + set +f ;; t) set -f - OLD_IFS=$IFS + old_ifs=$IFS IFS="," TIME_ARGS=( $OPTARG ) - if [ "${#TIME_ARGS[@]}" -ne 2 ]; then + if [[ "${#TIME_ARGS[@]}" -ne 2 ]]; then usage exit fi TIME_ARGS=( "--times" "${TIME_ARGS[@]}" ) - IFS=$OLD_IFS + IFS=$old_ifs + set +f ;; p) PACKAGE_NAME=$OPTARG @@ -136,7 +152,7 @@ esac done shift $((OPTIND -1)) -if [ $# -lt 1 ]; then +if [[ $# -lt 1 ]]; then usage exit fi @@ -145,21 +161,24 @@ LOGCAT_FILE=$1 NUM_CAT=$(($# - 1)) # Use a temp directory that will be deleted -if [ $USE_TEMP = true ]; then - OUT_DIR=$(mktemp -d --tmpdir=$PWD) +if [[ $USE_TEMP = true ]]; then + OUT_DIR=$(mktemp -d --tmpdir="$PWD") DO_REDO=true fi -if [ ! -d "$OUT_DIR" ]; then - mkdir $OUT_DIR +if [[ ! -d "$OUT_DIR" ]]; then + mkdir "$OUT_DIR" DO_REDO=true fi # Note: Steps are skipped if their output exists until -f flag is enabled echo "Output folder: $OUT_DIR" -unique_pids=( $(grep "RegisterDexFile" "$LOGCAT_FILE" | grep -v "zygote64" | tr -s ' ' | cut -f3 -d' ' | awk '!a[$0]++') ) +# Finds the lines matching pattern criteria and prints out unique instances of +# the 3rd word (PID) +unique_pids=( $(awk '/RegisterDexFile:/ && !/zygote/ {if(!a[$3]++) print $3}' \ + "$LOGCAT_FILE") ) echo "List of pids: ${unique_pids[@]}" -if [ $ALL_PIDS = false ]; then +if [[ $ALL_PIDS = false ]]; then unique_pids=( ${unique_pids[-1]} ) fi @@ -168,99 +187,142 @@ do echo echo "Current pid: $pid" echo - PID_DIR=$OUT_DIR/$pid - if [ ! -d "$PID_DIR" ]; then - mkdir $PID_DIR + pid_dir=$OUT_DIR/$pid + if [[ ! -d "$pid_dir" ]]; then + mkdir "$pid_dir" DO_REDO[$pid]=true fi - INTERMEDIATES_DIR=$PID_DIR/intermediates - RESULTS_DIR=$PID_DIR/results - LOGCAT_PID_FILE=$PID_DIR/logcat + intermediates_dir=$pid_dir/intermediates + results_dir=$pid_dir/results + logcat_pid_file=$pid_dir/logcat - if [ ! -f "$PID_DIR/logcat" ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then + if [[ ! -f "$logcat_pid_file" ]] || \ + [[ "${DO_REDO[$pid]}" = true ]] || \ + [[ $DO_REDO = true ]]; then DO_REDO[$pid]=true - awk '{if($3 == '$pid') print $0}' $LOGCAT_FILE > $LOGCAT_PID_FILE + awk "{if(\$3 == $pid) print \$0}" "$LOGCAT_FILE" > "$logcat_pid_file" fi - if [ ! -d "$INTERMEDIATES_DIR" ]; then - mkdir $INTERMEDIATES_DIR + if [[ ! -d "$intermediates_dir" ]]; then + mkdir "$intermediates_dir" DO_REDO[$pid]=true fi # Step 1 - Only output lines related to Sanitizer # Folder that holds all file output - ASAN_OUT=$INTERMEDIATES_DIR/asan_output - if [ ! -f $ASAN_OUT ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then + asan_out=$intermediates_dir/asan_output + if [[ ! -f "$asan_out" ]] || \ + [[ "${DO_REDO[$pid]}" = true ]] || \ + [[ $DO_REDO = true ]]; then DO_REDO[$pid]=true echo "Extracting ASAN output" - grep "app_process64" $LOGCAT_PID_FILE > $ASAN_OUT + grep "app_process64" "$logcat_pid_file" > "$asan_out" else echo "Skipped: Extracting ASAN output" fi # Step 2 - Only output lines containing Dex File Start Addresses - DEX_START=$INTERMEDIATES_DIR/dex_start - if [ ! -f $DEX_START ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then + dex_start=$intermediates_dir/dex_start + if [[ ! -f "$dex_start" ]] || \ + [[ "${DO_REDO[$pid]}" = true ]] || \ + [[ $DO_REDO = true ]]; then DO_REDO[$pid]=true echo "Extracting Start of Dex File(s)" - grep "RegisterDexFile" $LOGCAT_PID_FILE > $DEX_START + if [[ ! -z "$PACKAGE_NAME" ]]; then + awk '/RegisterDexFile:/ && /'"$PACKAGE_NAME"'/ && /\/data\/app/' \ + "$logcat_pid_file" > "$dex_start" + else + grep "RegisterDexFile:" "$logcat_pid_file" > "$dex_start" + fi else echo "Skipped: Extracting Start of Dex File(s)" fi # Step 3 - Clean Sanitizer output from Step 2 since logcat cannot # handle large amounts of output. - ASAN_OUT_FILTERED=$INTERMEDIATES_DIR/asan_output_filtered - if [ ! -f $ASAN_OUT_FILTERED ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then + asan_out_filtered=$intermediates_dir/asan_output_filtered + if [[ ! -f "$asan_out_filtered" ]] || \ + [[ "${DO_REDO[$pid]}" = true ]] || \ + [[ $DO_REDO = true ]]; then DO_REDO[$pid]=true echo "Filtering/Cleaning ASAN output" - python $ANDROID_BUILD_TOP/art/tools/runtime_memusage/prune_sanitizer_output.py \ - $EXACT_ARG $MIN_ARG -d $INTERMEDIATES_DIR $ASAN_OUT + python "$ANDROID_BUILD_TOP"/art/tools/runtime_memusage/prune_sanitizer_output.py \ + "$EXACT_ARG" "${MIN_ARG[@]}" -d "$intermediates_dir" "$asan_out" else echo "Skipped: Filtering/Cleaning ASAN output" fi # Step 4 - Retrieve symbolized stack traces from Step 3 output - SYM_FILTERED=$INTERMEDIATES_DIR/sym_filtered - if [ ! -f $SYM_FILTERED ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then + sym_filtered=$intermediates_dir/sym_filtered + if [[ ! -f "$sym_filtered" ]] || \ + [[ "${DO_REDO[$pid]}" = true ]] || \ + [[ $DO_REDO = true ]]; then DO_REDO[$pid]=true echo "Retrieving symbolized traces" - $ANDROID_BUILD_TOP/development/scripts/stack $ASAN_OUT_FILTERED > $SYM_FILTERED + "$ANDROID_BUILD_TOP"/development/scripts/stack "$asan_out_filtered" \ + > "$sym_filtered" else echo "Skipped: Retrieving symbolized traces" fi # Step 4.5 - Obtain Dex File Format of dex file related to package - BAKSMALI_DMP_OUT="$INTERMEDIATES_DIR""/baksmali_dex_file" - BAKSMALI_DMP_ARG="--dex-file="$BAKSMALI_DMP_OUT - if [ ! -f $BAKSMALI_DMP_OUT ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then - if [ $PACKAGE_NAME != "" ]; then + filtered_dex_start=$intermediates_dir/filtered_dex_start + baksmali_dmp_ctr=0 + baksmali_dmp_prefix=$intermediates_dir"/baksmali_dex_file_" + baksmali_dmp_files=( $baksmali_dmp_prefix* ) + baksmali_dmp_arg="--dex-file "${baksmali_dmp_files[$BAKSMALI_NUM]} + apk_dex_files=( ) + if [[ ! -f "$baksmali_dmp_prefix""$BAKSMALI_NUM" ]] || \ + [[ ! -f "$filtered_dex_start" ]] || \ + [[ "${DO_REDO[$pid]}" = true ]] || \ + [[ $DO_REDO = true ]]; then + if [[ ! -z "$PACKAGE_NAME" ]]; then + DO_REDO[$pid]=true # Extracting Dex File path on device from Dex File related to package - apk_directory=$(dirname $(grep $PACKAGE_NAME $DEX_START | tail -n1 | awk '{print $8}')) - apk_dex_files=$(adb shell find $apk_directory -name "*.?dex" -type f 2> /dev/null) - for apk_file in $apk_dex_files; do - base_name=$(basename $apk_file) - adb pull $apk_file $INTERMEDIATES_DIR/base."${base_name#*.}" + apk_directory=$(dirname "$(tail -n1 "$dex_start" | awk "{print \$8}")") + for dex_file in $(awk "{print \$8}" "$dex_start"); do + apk_dex_files+=( $(basename "$dex_file") ) + done + apk_oat_files=$(adb shell find "$apk_directory" -name "*.?dex" -type f \ + 2> /dev/null) + # Pulls the .odex and .vdex files associated with the package + for apk_file in $apk_oat_files; do + base_name=$(basename "$apk_file") + adb pull "$apk_file" "$intermediates_dir/base.${base_name#*.}" done - oatdump --oat-file=$INTERMEDIATES_DIR/base.odex --export-dex-to=$INTERMEDIATES_DIR --output=/dev/null - export_dex=( $INTERMEDIATES_DIR/*apk_export* ) - baksmali -JXmx1024M dump $export_dex > $BAKSMALI_DMP_OUT 2> /dev/null - if ! [ -s $BAKSMALI_DMP_OUT ]; then - rm $BAKSMALI_DMP_OUT - BAKSMALI_DMP_ARG="" - echo "Failed to retrieve Dex File format" - fi + oatdump --oat-file="$intermediates_dir"/base.odex \ + --export-dex-to="$intermediates_dir" --output=/dev/null + for dex_file in "${apk_dex_files[@]}"; do + exported_dex_file=$intermediates_dir/$dex_file"_export.dex" + baksmali_dmp_out="$baksmali_dmp_prefix""$((baksmali_dmp_ctr++))" + baksmali -JXmx1024M dump "$exported_dex_file" \ + > "$baksmali_dmp_out" 2> "$intermediates_dir"/error + if ! [[ -s "$baksmali_dmp_out" ]]; then + rm "$baksmali_dmp_prefix"* + baksmali_dmp_arg="" + echo "Failed to retrieve Dex File format" + break + fi + done + baksmali_dmp_files=( "$baksmali_dmp_prefix"* ) + baksmali_dmp_arg="--dex-file "${baksmali_dmp_files[$BAKSMALI_NUM]} + # Gets the baksmali dump associated with BAKSMALI_NUM + awk "NR == $((BAKSMALI_NUM + 1))" "$dex_start" > "$filtered_dex_start" + results_dir=$results_dir"_"$BAKSMALI_NUM + echo "Skipped: Retrieving Dex File format from baksmali; no package given" else - BAKSMALI_DMP_ARG="" - echo "Failed to retrieve Dex File format" + cp "$dex_start" "$filtered_dex_start" + baksmali_dmp_arg="" fi else + awk "NR == $((BAKSMALI_NUM + 1))" "$dex_start" > "$filtered_dex_start" + results_dir=$results_dir"_"$BAKSMALI_NUM echo "Skipped: Retrieving Dex File format from baksmali" fi - if [ ! -d "$RESULTS_DIR" ]; then - mkdir $RESULTS_DIR + if [[ ! -d "$results_dir" ]]; then + mkdir "$results_dir" DO_REDO[$pid]=true fi @@ -268,35 +330,45 @@ do # and trace data # Only the category names are needed for the commands giving final output shift - TIME_OUTPUT=($RESULTS_DIR/time_output_*.dat) - if [ ! -e ${TIME_OUTPUT[0]} ] || [ "${DO_REDO[$pid]}" = true ] || [ $DO_REDO = true ]; then + time_output=($results_dir/time_output_*.dat) + if [[ ! -e ${time_output[0]} ]] || \ + [[ "${DO_REDO[$pid]}" = true ]] || \ + [[ $DO_REDO = true ]]; then DO_REDO[$pid]=true echo "Creating Categorized Time Table" - python $ANDROID_BUILD_TOP/art/tools/runtime_memusage/symbol_trace_info.py \ - -d $RESULTS_DIR ${OFFSET_ARGS[@]} ${TIME_ARGS[@]} $BAKSMALI_DMP_ARG $ASAN_OUT_FILTERED $SYM_FILTERED $DEX_START $@ + baksmali_dmp_args=( $baksmali_dmp_arg ) + python "$ANDROID_BUILD_TOP"/art/tools/runtime_memusage/symbol_trace_info.py \ + -d "$results_dir" "${OFFSET_ARGS[@]}" "${baksmali_dmp_args[@]}" \ + "${TIME_ARGS[@]}" "$asan_out_filtered" "$sym_filtered" \ + "$filtered_dex_start" "$@" else echo "Skipped: Creating Categorized Time Table" fi # Step 6 - Use graph data from Step 5 to plot graph # Contains the category names used for legend of gnuplot - PLOT_CATS=`echo \"Uncategorized $@\"` - PACKAGE_STRING="" - if [ $PACKAGE_NAME != "" ]; then - PACKAGE_STRING="Package name: "$PACKAGE_NAME" " + plot_cats="\"Uncategorized $*\"" + package_string="" + dex_name="" + if [[ ! -z "$PACKAGE_NAME" ]]; then + package_string="Package name: $PACKAGE_NAME " + fi + if [[ ! -z "$baksmali_dmp_arg" ]]; then + dex_file_path="$(awk "{print \$8}" "$filtered_dex_start" | tail -n1)" + dex_name="Dex File name: $(basename "$dex_file_path") " fi echo "Plotting Categorized Time Table" # Plots the information from logcat gnuplot --persist -e \ - 'filename(n) = sprintf("'"$RESULTS_DIR"'/time_output_%d.dat", n); - catnames = '"$PLOT_CATS"'; - set title "'"$PACKAGE_STRING"'PID: '"$pid"'"; + 'filename(n) = sprintf("'"$results_dir"'/time_output_%d.dat", n); + catnames = '"$plot_cats"'; + set title "'"$package_string""$dex_name"'PID: '"$pid"'"; set xlabel "Time (milliseconds)"; set ylabel "Dex File Offset (bytes)"; plot for [i=0:'"$NUM_CAT"'] filename(i) using 1:2 title word(catnames, i + 1);' - if [ $USE_TEMP = true ]; then + if [[ $USE_TEMP = true ]]; then echo "Removing temp directory and files" - rm -rf $OUT_DIR + rm -rf "$OUT_DIR" fi done diff --git a/tools/runtime_memusage/symbol_trace_info.py b/tools/runtime_memusage/symbol_trace_info.py index a5ced380f0..22f8ee9405 100755 --- a/tools/runtime_memusage/symbol_trace_info.py +++ b/tools/runtime_memusage/symbol_trace_info.py @@ -38,15 +38,15 @@ def find_match(list_substrings, big_string): def absolute_to_relative(data_lists, symbol_traces): """Address changed to Dex File offset and shifting time to 0 min in ms.""" - plot_list = data_lists["plot_list"] - dex_start_list = data_lists["dex_start_list"] - cat_list = data_lists["cat_list"] + offsets = data_lists["offsets"] - time_offsets = data_lists["time_offsets"] + time_offsets = data_lists["times"] + + # Format of time provided by logcat time_format_str = "%H:%M:%S.%f" - first_access_time = datetime.strptime(plot_list[0][0], + first_access_time = datetime.strptime(data_lists["plot_list"][0][0], time_format_str) - for ind, elem in enumerate(plot_list): + for ind, elem in enumerate(data_lists["plot_list"]): elem_date_time = datetime.strptime(elem[0], time_format_str) # Shift time values so that first access is at time 0 milliseconds elem[0] = int((elem_date_time - first_access_time).total_seconds() * @@ -54,25 +54,23 @@ def absolute_to_relative(data_lists, symbol_traces): address_access = int(elem[1], 16) # For each poisoned address, find highest Dex File starting address less # than address_access - dex_file_start = dex_start_list[bisect.bisect(dex_start_list, - address_access) - 1 - ] - dex_offset = address_access - dex_file_start + dex_start_list, dex_size_list = zip(*data_lists["dex_ends_list"]) + dex_file_ind = bisect.bisect(dex_start_list, address_access) - 1 + dex_offset = address_access - dex_start_list[dex_file_ind] + # Assumes that offsets is already sorted and constrains offset to be + # within range of the dex_file + max_offset = min(offsets[1], dex_size_list[dex_file_ind]) # Meant to nullify data that does not meet offset criteria if specified - # Assumes that offsets is already sorted - if (dex_offset >= offsets[0] and dex_offset < offsets[1] and - elem[0] >= time_offsets[0] and elem[0] < time_offsets[1]): + if (dex_offset >= offsets[0] and dex_offset < max_offset and + elem[0] >= time_offsets[0] and elem[0] < time_offsets[1]): elem.insert(1, dex_offset) # Category that a data point belongs to - elem.insert(2, cat_list[ind]) + elem.insert(2, data_lists["cat_list"][ind]) else: - elem[0] = None - elem[1] = None - elem.append(None) - elem.append(None) + elem[:] = 4 * [None] symbol_traces[ind] = None - cat_list[ind] = None + data_lists["cat_list"][ind] = None def print_category_info(cat_split, outname, out_dir_name, title): @@ -98,7 +96,7 @@ def print_category_info(cat_split, outname, out_dir_name, title): def print_categories(categories, symbol_file_split, out_dir_name): """Prints details of all categories.""" symbol_file_split = [trace for trace in symbol_file_split - if trace is not None] + if trace is not None] # Info of traces containing a call to current category for cat_num, cat_name in enumerate(categories[1:]): print("\nCategory #%d" % (cat_num + 1)) @@ -184,8 +182,8 @@ def parse_args(argv): def get_dex_offset_data(line, dex_file_item): """ Returns a tuple of dex file offset, item name, and data of a line.""" return (int(line[:line.find(":")], 16), - (dex_file_item, - line.split("|")[1].strip()) + (dex_file_item, + line.split("|")[1].strip()) ) @@ -206,27 +204,28 @@ def read_data(parsed_argv): logcat_file_data = parsed_argv.sanitizer_trace.readlines() parsed_argv.sanitizer_trace.close() - symbol_file_split = parsed_argv.symbol_trace.read().split("Stack Trace")[ - 1:] + symbol_file_split = parsed_argv.symbol_trace.read().split("Stack Trace") + # Removes text before first trace + symbol_file_split = symbol_file_split[1:] parsed_argv.symbol_trace.close() dex_start_file_data = parsed_argv.dex_starts.readlines() parsed_argv.dex_starts.close() - if parsed_argv.dex_file != None: + if parsed_argv.dex_file is not None: dex_file_data = parsed_argv.dex_file.read() parsed_argv.dex_file.close() # Splits baksmali dump by each item item_split = [s.splitlines() for s in re.split(r"\|\[[0-9]+\] ", - dex_file_data)] + dex_file_data)] # Splits each item by line and creates a list of offsets and a # corresponding list of the data associated with that line offset_list, offset_data = zip(*[get_dex_offset_data(line, item[0]) - for item in item_split + for item in item_split for line in item[1:] - if re.search("[0-9a-f]{6}:", line) - is not None - and line.find("|") != -1]) + if re.search("[0-9a-f]{6}:", line) + is not None and + line.find("|") != -1]) data_lists["offset_list"] = offset_list data_lists["offset_data"] = offset_data else: @@ -237,7 +236,8 @@ def read_data(parsed_argv): if elem[0] in (1, 11) ] for line in logcat_file_data - if "use-after-poison" in line + if "use-after-poison" in line or + "unknown-crash" in line ] # Contains a mapping between traces and the category they belong to # based on arguments @@ -246,27 +246,25 @@ def read_data(parsed_argv): # Contains a list of starting address of all dex files to calculate dex # offsets - data_lists["dex_start_list"] = [int(line.split("@")[1], 16) - for line in dex_start_file_data - if "RegisterDexFile" in line - ] + data_lists["dex_ends_list"] = [(int(line.split()[9], 16), + int(line.split()[12]) + ) + for line in dex_start_file_data + if "RegisterDexFile" in line + ] # Dex File Starting addresses must be sorted because bisect requires sorted # lists. - data_lists["dex_start_list"].sort() + data_lists["dex_ends_list"].sort() return data_lists, categories, symbol_file_split def main(): """Takes in trace information and outputs details about them.""" - parsed_argv = parse_args(None) data_lists, categories, symbol_file_split = read_data(parsed_argv) # Formats plot_list such that each element is a data point - #absolute_to_relative(data_lists["plot_list"], data_lists["dex_start_list"], - # data_lists["cat_list"], data_lists["offsets"], - # data_lists["times"], symbol_file_split) absolute_to_relative(data_lists, symbol_file_split) for file_ext, cat_name in enumerate(categories): out_file_name = os.path.join(parsed_argv.out_dir_name, "time_output_" + @@ -287,10 +285,10 @@ def main(): hex(dex_offset) + " " + str(address)) - if data_lists.has_key("offset_list"): + if "offset_list" in data_lists: dex_offset_index = bisect.bisect( - data_lists["offset_list"], - dex_offset) - 1 + data_lists["offset_list"], + dex_offset) - 1 aligned_dex_offset = (data_lists["offset_list"] [dex_offset_index]) dex_offset_data = (data_lists["offset_data"] |