diff options
author | 2023-09-26 14:16:57 +0000 | |
---|---|---|
committer | 2023-09-26 16:23:41 +0000 | |
commit | e6067622fca7f243e5e84aaf183c48ff5ef19a55 (patch) | |
tree | a5f43916d698dca8f82ea56c09c2944599537f33 | |
parent | b320a4d6edb6ab0824934700d706cb5e7cf19c56 (diff) |
Update imgdiag scripts
run_imgdiag:
* Add PID to output filenames.
* Add option to specify host output directory.
create_dirty_image_objects:
* Add an option to merge imgdiag files with the same process names.
* Improve object sorting.
* Print stats in csv format.
Test: ./run_imgdiag.py --host-out-dir out
Test: ./create_dirty_image_objects.py ./out/imgdiag_* --merge-same-procnames --print-stats
Change-Id: I94aba7b241e0a0c2ebfb33ffcc333640c04f2d01
-rwxr-xr-x | imgdiag/create_dirty_image_objects.py | 149 | ||||
-rwxr-xr-x | imgdiag/run_imgdiag.py | 31 |
2 files changed, 129 insertions, 51 deletions
diff --git a/imgdiag/create_dirty_image_objects.py b/imgdiag/create_dirty_image_objects.py index d090fcc2d1..da0ed2c918 100755 --- a/imgdiag/create_dirty_image_objects.py +++ b/imgdiag/create_dirty_image_objects.py @@ -16,37 +16,90 @@ import argparse from collections import defaultdict +from enum import Enum import os +import re -def process_dirty_entries(entries, with_sort): - mark_counts = defaultdict(int) +class SortType(Enum): + NONE = 'none' + SIMPLE = 'simple' + OPT_NEIGHBOURS = 'opt_neighbours' + + +def merge_same_procnames(entries): + path_regex = r'(.+)_(\d+).txt' + prog = re.compile(path_regex) + + merged_entries = defaultdict(set) + for path, objs in entries: + basename = os.path.basename(path) + m = prog.match(basename) + if m: + merged_entries[m.group(1)].update(objs) + + return sorted(merged_entries.items(), key=lambda x: len(x[1])) + + +def opt_neighbours(sort_keys): + sort_keys = dict(sort_keys) + res = list() + + # Start with a bin with the lowest process and objects count. + cur_key = min( + sort_keys.items(), key=lambda item: (item[0].bit_count(), len(item[1])) + )[0] + res.append((cur_key, sort_keys[cur_key])) + del sort_keys[cur_key] + + # Find next most similar sort key and update the result. + while sort_keys: + + def jaccard_index(x): + return (x & cur_key).bit_count() / (x | cur_key).bit_count() + + next_key = max(sort_keys.keys(), key=jaccard_index) + res.append((next_key, sort_keys[next_key])) + del sort_keys[next_key] + cur_key = next_key + return res + + +def process_dirty_entries(entries, sort_type): dirty_image_objects = [] union = set() - for v in entries.values(): + for k, v in entries: union = union.union(v) + if sort_type == SortType.NONE: + dirty_obj_lines = [obj + '\n' for obj in sorted(union)] + return (dirty_obj_lines, dict()) + + # sort_key -> [objs] + sort_keys = defaultdict(list) for obj in union: - str_marker = '' - marker = 0 - # Sort marker is uint32_t, where Nth bit is set if Nth process has this object dirty. - for idx, v in enumerate(entries.values()): + sort_key = 0 + # Nth bit of sort_key is set if this object is dirty in Nth process. + for idx, (k, v) in enumerate(entries): if obj in v: - str_marker += chr(ord('A') + idx) - marker = (marker << 1) | 1 + sort_key = (sort_key << 1) | 1 else: - str_marker += '_' - marker = marker << 1 + sort_key = sort_key << 1 + + sort_keys[sort_key].append(obj) + + sort_keys = sorted(sort_keys.items()) - if with_sort: - dirty_image_objects.append(obj + ' ' + str(marker) + '\n') - else: - dirty_image_objects.append(obj + '\n') + if sort_type == SortType.OPT_NEIGHBOURS: + sort_keys = opt_neighbours(sort_keys) - mark_counts[str_marker] += 1 + dirty_obj_lines = list() + for idx, (_, objs) in enumerate(sort_keys): + for obj in objs: + dirty_obj_lines.append(obj + ' ' + str(idx) + '\n') - return (dirty_image_objects, mark_counts) + return (dirty_obj_lines, sort_keys) def main(): @@ -62,10 +115,23 @@ def main(): help='imgdiag files to use.', ) parser.add_argument( - '--sort-objects', + '--sort-type', + choices=[e.value for e in SortType], + default=SortType.OPT_NEIGHBOURS.value, + help=( + 'Object sorting type. "simple" puts objects with the same usage' + ' pattern in the same bins. "opt_neighbours" also tries to put bins' + ' with similar usage patterns close to each other.' + ), + ) + parser.add_argument( + '--merge-same-procnames', action=argparse.BooleanOptionalAction, - default=True, - help='Use object sorting.', + default=False, + help=( + 'Merge dirty objects from files with the same process name (different' + ' pid). Files are expected to end with "_{pid}.txt"' + ), ) parser.add_argument( '--output-filename', @@ -81,49 +147,40 @@ def main(): args = parser.parse_args() - entries = dict() + entries = list() for path in args.imgdiag_files: with open(path) as f: lines = f.readlines() prefix = 'dirty_obj: ' lines = [l.strip().removeprefix(prefix) for l in lines if prefix in l] - entries[path] = set(lines) - - if args.sort_objects and len(entries) > 32: - print( - 'WARNING: too many processes for sorting, using top 32 by number of' - ' dirty objects.' - ) - entries_list = sorted( - list(entries.items()), reverse=True, key=lambda x: len(x[1]) - ) - entries_list = entries_list[0:32] - entries = {k: v for (k, v) in entries_list} + entries.append((path, set(lines))) + + entries = sorted(entries, key=lambda x: len(x[1])) + + if args.merge_same_procnames: + entries = merge_same_procnames(entries) print('Using processes:') - for k, v in sorted(entries.items(), key=lambda x: len(x[1])): + for k, v in entries: print(f'{k}: {len(v)}') print() - dirty_image_objects, mark_counts = process_dirty_entries( - entries=entries, with_sort=args.sort_objects + dirty_image_objects, sort_keys = process_dirty_entries( + entries=entries, sort_type=SortType(args.sort_type) ) with open(args.output_filename, 'w') as f: f.writelines(dirty_image_objects) if args.print_stats: - mark_counts = sorted( - list(mark_counts.items()), key=lambda x: x[1], reverse=True - ) - - for i, path in enumerate(entries.keys()): - print(path, chr(ord('A') + i)) - + print(','.join(k for k, v in entries), ',obj_count') total_count = 0 - for marker, count in mark_counts: - print(marker, count) - total_count += count + for sort_key, objs in sort_keys: + bits_csv = ','.join( + '{sort_key:0{width}b}'.format(sort_key=sort_key, width=len(entries)) + ) + print(bits_csv, ',', len(objs)) + total_count += len(objs) print('total: ', total_count) diff --git a/imgdiag/run_imgdiag.py b/imgdiag/run_imgdiag.py index 6d035540be..a51e5ee926 100755 --- a/imgdiag/run_imgdiag.py +++ b/imgdiag/run_imgdiag.py @@ -21,14 +21,26 @@ import subprocess try: from tqdm import tqdm except: + def tqdm(x): return x ProcEntry = namedtuple('ProcEntry', 'pid, ppid, cmd, name, etc_args') -def get_mem_stats(zygote_pid, target_pid, target_name, imgdiag_path, boot_image, device_out_dir): - imgdiag_output_path = f'{device_out_dir}/imgdiag_{target_name}.txt' + +def get_mem_stats( + zygote_pid, + target_pid, + target_name, + imgdiag_path, + boot_image, + device_out_dir, + host_out_dir, +): + imgdiag_output_path = ( + f'{device_out_dir}/imgdiag_{target_name}_{target_pid}.txt' + ) cmd_collect = ( 'adb shell ' f'"{imgdiag_path} --zygote-diff-pid={zygote_pid} --image-diff-pid={target_pid} ' @@ -41,13 +53,15 @@ def get_mem_stats(zygote_pid, target_pid, target_name, imgdiag_path, boot_image, print('imgdiag call failed on:', target_pid, target_name) return - cmd_pull = f'adb pull {imgdiag_output_path} ./' + cmd_pull = f'adb pull {imgdiag_output_path} {host_out_dir}' subprocess.run(cmd_pull, shell=True, check=True, capture_output=True) def main(): parser = argparse.ArgumentParser( - description='Run imgdiag on selected processes and pull results from the device.', + description=( + 'Run imgdiag on selected processes and pull results from the device.' + ), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( @@ -76,6 +90,11 @@ def main(): default='/data/local/tmp/imgdiag_out', help='Directory for imgdiag output files on the device.', ) + parser.add_argument( + '--host-out-dir', + default='./', + help='Directory for imgdiag output files on the host.', + ) args = parser.parse_args() @@ -104,6 +123,7 @@ def main(): subprocess.run( args=f'adb shell "mkdir -p {args.device_out_dir}"', check=True, shell=True ) + subprocess.run(args=f'mkdir -p {args.host_out_dir}', check=True, shell=True) for entry in tqdm(zygote_children): get_mem_stats( @@ -113,8 +133,9 @@ def main(): imgdiag_path=args.imgdiag, boot_image=args.boot_image, device_out_dir=args.device_out_dir, + host_out_dir=args.host_out_dir, ) if __name__ == '__main__': - main() + main() |