Update imgdiag scripts
run_imgdiag:
* Add PID to output filenames.
* Add option to specify host output directory.
create_dirty_image_objects:
* Add an option to merge imgdiag files with the same process names.
* Improve object sorting.
* Print stats in csv format.
Test: ./run_imgdiag.py --host-out-dir out
Test: ./create_dirty_image_objects.py ./out/imgdiag_* --merge-same-procnames --print-stats
Change-Id: I94aba7b241e0a0c2ebfb33ffcc333640c04f2d01
diff --git a/imgdiag/create_dirty_image_objects.py b/imgdiag/create_dirty_image_objects.py
index d090fcc..da0ed2c 100755
--- a/imgdiag/create_dirty_image_objects.py
+++ b/imgdiag/create_dirty_image_objects.py
@@ -16,37 +16,90 @@
import argparse
from collections import defaultdict
+from enum import Enum
import os
+import re
-def process_dirty_entries(entries, with_sort):
- mark_counts = defaultdict(int)
+class SortType(Enum):
+ NONE = 'none'
+ SIMPLE = 'simple'
+ OPT_NEIGHBOURS = 'opt_neighbours'
+
+
+def merge_same_procnames(entries):
+ path_regex = r'(.+)_(\d+).txt'
+ prog = re.compile(path_regex)
+
+ merged_entries = defaultdict(set)
+ for path, objs in entries:
+ basename = os.path.basename(path)
+ m = prog.match(basename)
+ if m:
+ merged_entries[m.group(1)].update(objs)
+
+ return sorted(merged_entries.items(), key=lambda x: len(x[1]))
+
+
+def opt_neighbours(sort_keys):
+ sort_keys = dict(sort_keys)
+ res = list()
+
+ # Start with a bin with the lowest process and objects count.
+ cur_key = min(
+ sort_keys.items(), key=lambda item: (item[0].bit_count(), len(item[1]))
+ )[0]
+ res.append((cur_key, sort_keys[cur_key]))
+ del sort_keys[cur_key]
+
+ # Find next most similar sort key and update the result.
+ while sort_keys:
+
+ def jaccard_index(x):
+ return (x & cur_key).bit_count() / (x | cur_key).bit_count()
+
+ next_key = max(sort_keys.keys(), key=jaccard_index)
+ res.append((next_key, sort_keys[next_key]))
+ del sort_keys[next_key]
+ cur_key = next_key
+ return res
+
+
+def process_dirty_entries(entries, sort_type):
dirty_image_objects = []
union = set()
- for v in entries.values():
+ for k, v in entries:
union = union.union(v)
+ if sort_type == SortType.NONE:
+ dirty_obj_lines = [obj + '\n' for obj in sorted(union)]
+ return (dirty_obj_lines, dict())
+
+ # sort_key -> [objs]
+ sort_keys = defaultdict(list)
for obj in union:
- str_marker = ''
- marker = 0
- # Sort marker is uint32_t, where Nth bit is set if Nth process has this object dirty.
- for idx, v in enumerate(entries.values()):
+ sort_key = 0
+ # Nth bit of sort_key is set if this object is dirty in Nth process.
+ for idx, (k, v) in enumerate(entries):
if obj in v:
- str_marker += chr(ord('A') + idx)
- marker = (marker << 1) | 1
+ sort_key = (sort_key << 1) | 1
else:
- str_marker += '_'
- marker = marker << 1
+ sort_key = sort_key << 1
- if with_sort:
- dirty_image_objects.append(obj + ' ' + str(marker) + '\n')
- else:
- dirty_image_objects.append(obj + '\n')
+ sort_keys[sort_key].append(obj)
- mark_counts[str_marker] += 1
+ sort_keys = sorted(sort_keys.items())
- return (dirty_image_objects, mark_counts)
+ if sort_type == SortType.OPT_NEIGHBOURS:
+ sort_keys = opt_neighbours(sort_keys)
+
+ dirty_obj_lines = list()
+ for idx, (_, objs) in enumerate(sort_keys):
+ for obj in objs:
+ dirty_obj_lines.append(obj + ' ' + str(idx) + '\n')
+
+ return (dirty_obj_lines, sort_keys)
def main():
@@ -62,10 +115,23 @@
help='imgdiag files to use.',
)
parser.add_argument(
- '--sort-objects',
+ '--sort-type',
+ choices=[e.value for e in SortType],
+ default=SortType.OPT_NEIGHBOURS.value,
+ help=(
+ 'Object sorting type. "simple" puts objects with the same usage'
+ ' pattern in the same bins. "opt_neighbours" also tries to put bins'
+ ' with similar usage patterns close to each other.'
+ ),
+ )
+ parser.add_argument(
+ '--merge-same-procnames',
action=argparse.BooleanOptionalAction,
- default=True,
- help='Use object sorting.',
+ default=False,
+ help=(
+ 'Merge dirty objects from files with the same process name (different'
+ ' pid). Files are expected to end with "_{pid}.txt"'
+ ),
)
parser.add_argument(
'--output-filename',
@@ -81,49 +147,40 @@
args = parser.parse_args()
- entries = dict()
+ entries = list()
for path in args.imgdiag_files:
with open(path) as f:
lines = f.readlines()
prefix = 'dirty_obj: '
lines = [l.strip().removeprefix(prefix) for l in lines if prefix in l]
- entries[path] = set(lines)
+ entries.append((path, set(lines)))
- if args.sort_objects and len(entries) > 32:
- print(
- 'WARNING: too many processes for sorting, using top 32 by number of'
- ' dirty objects.'
- )
- entries_list = sorted(
- list(entries.items()), reverse=True, key=lambda x: len(x[1])
- )
- entries_list = entries_list[0:32]
- entries = {k: v for (k, v) in entries_list}
+ entries = sorted(entries, key=lambda x: len(x[1]))
+
+ if args.merge_same_procnames:
+ entries = merge_same_procnames(entries)
print('Using processes:')
- for k, v in sorted(entries.items(), key=lambda x: len(x[1])):
+ for k, v in entries:
print(f'{k}: {len(v)}')
print()
- dirty_image_objects, mark_counts = process_dirty_entries(
- entries=entries, with_sort=args.sort_objects
+ dirty_image_objects, sort_keys = process_dirty_entries(
+ entries=entries, sort_type=SortType(args.sort_type)
)
with open(args.output_filename, 'w') as f:
f.writelines(dirty_image_objects)
if args.print_stats:
- mark_counts = sorted(
- list(mark_counts.items()), key=lambda x: x[1], reverse=True
- )
-
- for i, path in enumerate(entries.keys()):
- print(path, chr(ord('A') + i))
-
+ print(','.join(k for k, v in entries), ',obj_count')
total_count = 0
- for marker, count in mark_counts:
- print(marker, count)
- total_count += count
+ for sort_key, objs in sort_keys:
+ bits_csv = ','.join(
+ '{sort_key:0{width}b}'.format(sort_key=sort_key, width=len(entries))
+ )
+ print(bits_csv, ',', len(objs))
+ total_count += len(objs)
print('total: ', total_count)
diff --git a/imgdiag/run_imgdiag.py b/imgdiag/run_imgdiag.py
index 6d03554..a51e5ee 100755
--- a/imgdiag/run_imgdiag.py
+++ b/imgdiag/run_imgdiag.py
@@ -21,14 +21,26 @@
try:
from tqdm import tqdm
except:
+
def tqdm(x):
return x
ProcEntry = namedtuple('ProcEntry', 'pid, ppid, cmd, name, etc_args')
-def get_mem_stats(zygote_pid, target_pid, target_name, imgdiag_path, boot_image, device_out_dir):
- imgdiag_output_path = f'{device_out_dir}/imgdiag_{target_name}.txt'
+
+def get_mem_stats(
+ zygote_pid,
+ target_pid,
+ target_name,
+ imgdiag_path,
+ boot_image,
+ device_out_dir,
+ host_out_dir,
+):
+ imgdiag_output_path = (
+ f'{device_out_dir}/imgdiag_{target_name}_{target_pid}.txt'
+ )
cmd_collect = (
'adb shell '
f'"{imgdiag_path} --zygote-diff-pid={zygote_pid} --image-diff-pid={target_pid} '
@@ -41,13 +53,15 @@
print('imgdiag call failed on:', target_pid, target_name)
return
- cmd_pull = f'adb pull {imgdiag_output_path} ./'
+ cmd_pull = f'adb pull {imgdiag_output_path} {host_out_dir}'
subprocess.run(cmd_pull, shell=True, check=True, capture_output=True)
def main():
parser = argparse.ArgumentParser(
- description='Run imgdiag on selected processes and pull results from the device.',
+ description=(
+ 'Run imgdiag on selected processes and pull results from the device.'
+ ),
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
@@ -76,6 +90,11 @@
default='/data/local/tmp/imgdiag_out',
help='Directory for imgdiag output files on the device.',
)
+ parser.add_argument(
+ '--host-out-dir',
+ default='./',
+ help='Directory for imgdiag output files on the host.',
+ )
args = parser.parse_args()
@@ -104,6 +123,7 @@
subprocess.run(
args=f'adb shell "mkdir -p {args.device_out_dir}"', check=True, shell=True
)
+ subprocess.run(args=f'mkdir -p {args.host_out_dir}', check=True, shell=True)
for entry in tqdm(zygote_children):
get_mem_stats(
@@ -113,8 +133,9 @@
imgdiag_path=args.imgdiag,
boot_image=args.boot_image,
device_out_dir=args.device_out_dir,
+ host_out_dir=args.host_out_dir,
)
if __name__ == '__main__':
- main()
+ main()