| #! /usr/bin/env python3 |
| # |
| # Copyright 2023, The Android Open Source Project |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| import argparse |
| from collections import defaultdict |
| from enum import Enum |
| import os |
| import re |
| |
| |
| class SortType(Enum): |
| NONE = 'none' |
| SIMPLE = 'simple' |
| OPT_NEIGHBOURS = 'opt_neighbours' |
| |
| |
| def merge_same_procnames(entries): |
| path_regex = r'(.+)_(\d+).txt' |
| prog = re.compile(path_regex) |
| |
| merged_entries = defaultdict(set) |
| for path, objs in entries: |
| basename = os.path.basename(path) |
| m = prog.match(basename) |
| if m: |
| merged_entries[m.group(1)].update(objs) |
| |
| return sorted(merged_entries.items(), key=lambda x: len(x[1])) |
| |
| |
| def opt_neighbours(sort_keys): |
| sort_keys = dict(sort_keys) |
| res = list() |
| |
| # Start with a bin with the lowest process and objects count. |
| cur_key = min( |
| sort_keys.items(), key=lambda item: (item[0].bit_count(), len(item[1])) |
| )[0] |
| res.append((cur_key, sort_keys[cur_key])) |
| del sort_keys[cur_key] |
| |
| # Find next most similar sort key and update the result. |
| while sort_keys: |
| |
| def jaccard_index(x): |
| return (x & cur_key).bit_count() / (x | cur_key).bit_count() |
| |
| next_key = max(sort_keys.keys(), key=jaccard_index) |
| res.append((next_key, sort_keys[next_key])) |
| del sort_keys[next_key] |
| cur_key = next_key |
| return res |
| |
| |
| def process_dirty_entries(entries, sort_type): |
| dirty_image_objects = [] |
| |
| union = set() |
| for k, v in entries: |
| union = union.union(v) |
| |
| if sort_type == SortType.NONE: |
| dirty_obj_lines = [obj + '\n' for obj in sorted(union)] |
| return (dirty_obj_lines, dict()) |
| |
| # sort_key -> [objs] |
| sort_keys = defaultdict(list) |
| for obj in union: |
| sort_key = 0 |
| # Nth bit of sort_key is set if this object is dirty in Nth process. |
| for idx, (k, v) in enumerate(entries): |
| if obj in v: |
| sort_key = (sort_key << 1) | 1 |
| else: |
| sort_key = sort_key << 1 |
| |
| sort_keys[sort_key].append(obj) |
| |
| sort_keys = sorted(sort_keys.items()) |
| |
| if sort_type == SortType.OPT_NEIGHBOURS: |
| sort_keys = opt_neighbours(sort_keys) |
| |
| dirty_obj_lines = list() |
| for idx, (_, objs) in enumerate(sort_keys): |
| for obj in objs: |
| dirty_obj_lines.append(obj + ' ' + str(idx) + '\n') |
| |
| return (dirty_obj_lines, sort_keys) |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser( |
| description=( |
| 'Create dirty-image-objects file from specified imgdiag output files.' |
| ), |
| formatter_class=argparse.ArgumentDefaultsHelpFormatter, |
| ) |
| parser.add_argument( |
| 'imgdiag_files', |
| nargs='+', |
| help='imgdiag files to use.', |
| ) |
| parser.add_argument( |
| '--sort-type', |
| choices=[e.value for e in SortType], |
| default=SortType.OPT_NEIGHBOURS.value, |
| help=( |
| 'Object sorting type. "simple" puts objects with the same usage' |
| ' pattern in the same bins. "opt_neighbours" also tries to put bins' |
| ' with similar usage patterns close to each other.' |
| ), |
| ) |
| parser.add_argument( |
| '--merge-same-procnames', |
| action=argparse.BooleanOptionalAction, |
| default=False, |
| help=( |
| 'Merge dirty objects from files with the same process name (different' |
| ' pid). Files are expected to end with "_{pid}.txt"' |
| ), |
| ) |
| parser.add_argument( |
| '--output-filename', |
| default='dirty-image-objects.txt', |
| help='Output file for dirty image objects.', |
| ) |
| parser.add_argument( |
| '--print-stats', |
| action=argparse.BooleanOptionalAction, |
| default=False, |
| help='Print dirty object stats.', |
| ) |
| |
| args = parser.parse_args() |
| |
| entries = list() |
| for path in args.imgdiag_files: |
| with open(path) as f: |
| lines = f.readlines() |
| prefix = 'dirty_obj: ' |
| lines = [l.strip().removeprefix(prefix) for l in lines if prefix in l] |
| entries.append((path, set(lines))) |
| |
| entries = sorted(entries, key=lambda x: len(x[1])) |
| |
| if args.merge_same_procnames: |
| entries = merge_same_procnames(entries) |
| |
| print('Using processes:') |
| for k, v in entries: |
| print(f'{k}: {len(v)}') |
| print() |
| |
| dirty_image_objects, sort_keys = process_dirty_entries( |
| entries=entries, sort_type=SortType(args.sort_type) |
| ) |
| |
| with open(args.output_filename, 'w') as f: |
| f.writelines(dirty_image_objects) |
| |
| if args.print_stats: |
| print(','.join(k for k, v in entries), ',obj_count') |
| total_count = 0 |
| for sort_key, objs in sort_keys: |
| bits_csv = ','.join( |
| '{sort_key:0{width}b}'.format(sort_key=sort_key, width=len(entries)) |
| ) |
| print(bits_csv, ',', len(objs)) |
| total_count += len(objs) |
| print('total: ', total_count) |
| |
| |
| if __name__ == '__main__': |
| main() |