blob: da0ed2c918a533cce761318465eaeb849419c84f [file] [log] [blame]
#! /usr/bin/env python3
#
# Copyright 2023, The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from collections import defaultdict
from enum import Enum
import os
import re
class SortType(Enum):
NONE = 'none'
SIMPLE = 'simple'
OPT_NEIGHBOURS = 'opt_neighbours'
def merge_same_procnames(entries):
path_regex = r'(.+)_(\d+).txt'
prog = re.compile(path_regex)
merged_entries = defaultdict(set)
for path, objs in entries:
basename = os.path.basename(path)
m = prog.match(basename)
if m:
merged_entries[m.group(1)].update(objs)
return sorted(merged_entries.items(), key=lambda x: len(x[1]))
def opt_neighbours(sort_keys):
sort_keys = dict(sort_keys)
res = list()
# Start with a bin with the lowest process and objects count.
cur_key = min(
sort_keys.items(), key=lambda item: (item[0].bit_count(), len(item[1]))
)[0]
res.append((cur_key, sort_keys[cur_key]))
del sort_keys[cur_key]
# Find next most similar sort key and update the result.
while sort_keys:
def jaccard_index(x):
return (x & cur_key).bit_count() / (x | cur_key).bit_count()
next_key = max(sort_keys.keys(), key=jaccard_index)
res.append((next_key, sort_keys[next_key]))
del sort_keys[next_key]
cur_key = next_key
return res
def process_dirty_entries(entries, sort_type):
dirty_image_objects = []
union = set()
for k, v in entries:
union = union.union(v)
if sort_type == SortType.NONE:
dirty_obj_lines = [obj + '\n' for obj in sorted(union)]
return (dirty_obj_lines, dict())
# sort_key -> [objs]
sort_keys = defaultdict(list)
for obj in union:
sort_key = 0
# Nth bit of sort_key is set if this object is dirty in Nth process.
for idx, (k, v) in enumerate(entries):
if obj in v:
sort_key = (sort_key << 1) | 1
else:
sort_key = sort_key << 1
sort_keys[sort_key].append(obj)
sort_keys = sorted(sort_keys.items())
if sort_type == SortType.OPT_NEIGHBOURS:
sort_keys = opt_neighbours(sort_keys)
dirty_obj_lines = list()
for idx, (_, objs) in enumerate(sort_keys):
for obj in objs:
dirty_obj_lines.append(obj + ' ' + str(idx) + '\n')
return (dirty_obj_lines, sort_keys)
def main():
parser = argparse.ArgumentParser(
description=(
'Create dirty-image-objects file from specified imgdiag output files.'
),
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
'imgdiag_files',
nargs='+',
help='imgdiag files to use.',
)
parser.add_argument(
'--sort-type',
choices=[e.value for e in SortType],
default=SortType.OPT_NEIGHBOURS.value,
help=(
'Object sorting type. "simple" puts objects with the same usage'
' pattern in the same bins. "opt_neighbours" also tries to put bins'
' with similar usage patterns close to each other.'
),
)
parser.add_argument(
'--merge-same-procnames',
action=argparse.BooleanOptionalAction,
default=False,
help=(
'Merge dirty objects from files with the same process name (different'
' pid). Files are expected to end with "_{pid}.txt"'
),
)
parser.add_argument(
'--output-filename',
default='dirty-image-objects.txt',
help='Output file for dirty image objects.',
)
parser.add_argument(
'--print-stats',
action=argparse.BooleanOptionalAction,
default=False,
help='Print dirty object stats.',
)
args = parser.parse_args()
entries = list()
for path in args.imgdiag_files:
with open(path) as f:
lines = f.readlines()
prefix = 'dirty_obj: '
lines = [l.strip().removeprefix(prefix) for l in lines if prefix in l]
entries.append((path, set(lines)))
entries = sorted(entries, key=lambda x: len(x[1]))
if args.merge_same_procnames:
entries = merge_same_procnames(entries)
print('Using processes:')
for k, v in entries:
print(f'{k}: {len(v)}')
print()
dirty_image_objects, sort_keys = process_dirty_entries(
entries=entries, sort_type=SortType(args.sort_type)
)
with open(args.output_filename, 'w') as f:
f.writelines(dirty_image_objects)
if args.print_stats:
print(','.join(k for k, v in entries), ',obj_count')
total_count = 0
for sort_key, objs in sort_keys:
bits_csv = ','.join(
'{sort_key:0{width}b}'.format(sort_key=sort_key, width=len(entries))
)
print(bits_csv, ',', len(objs))
total_count += len(objs)
print('total: ', total_count)
if __name__ == '__main__':
main()