summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Jihoon Kang <jihoonkang@google.com> 2022-10-28 22:21:42 +0000
committer Jihoon Kang <jihoonkang@google.com> 2022-10-28 22:45:08 +0000
commit3d38b6d9c803f6b04d71f620a0c1417a1c7973cb (patch)
tree7a65d65b614d424313594050a4426514ec4bbebe
parent03b846ff370e9d4b3e7adf34c363837425f4611d (diff)
Create custom diff tool to compare stub contents
Context: - Create a tool to analyze loose equivalence of the stubs in two directories - The tool can analyze strict equivalence of the directories of the stubs in two directories - Analyze text to compare loose equivalence of the stub contents; Add a functionality to pass `skip_words` as arguments, which are optional list of words used to signal the tool which words are not considered diff - The tool can be locally used to compare stub contents, and does not contribute to build process Test: m Change-Id: I74563a9a24ecdde939be2ce37b9096a9aeb4920a
-rw-r--r--tools/stub_diff_analyzer.py328
1 files changed, 328 insertions, 0 deletions
diff --git a/tools/stub_diff_analyzer.py b/tools/stub_diff_analyzer.py
new file mode 100644
index 0000000000..e49d092311
--- /dev/null
+++ b/tools/stub_diff_analyzer.py
@@ -0,0 +1,328 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2022 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from sys import exit
+from typing import List
+from glob import glob
+from pathlib import Path
+from collections import defaultdict
+from difflib import Differ
+from re import split
+from tqdm import tqdm
+import argparse
+
+
+DIFFER_CODE_LEN = 2
+
+class DifferCodes:
+ COMMON = ' '
+ UNIQUE_FIRST = '- '
+ UNIQUE_SECOND = '+ '
+ DIFF_IDENT = '? '
+
+class FilesDiffAnalyzer:
+ def __init__(self, args) -> None:
+ self.out_dir = args.out_dir
+ self.show_diff = args.show_diff
+ self.skip_words = args.skip_words
+ self.first_dir = args.first_dir
+ self.second_dir = args.second_dir
+ self.include_common = args.include_common
+
+ self.first_dir_files = self.get_files(self.first_dir)
+ self.second_dir_files = self.get_files(self.second_dir)
+ self.common_file_map = defaultdict(set)
+
+ self.map_common_files(self.first_dir_files, self.first_dir)
+ self.map_common_files(self.second_dir_files, self.second_dir)
+
+ def get_files(self, dir: str) -> List[str]:
+ """Get all files directory in the input directory including the files in the subdirectories
+
+ Recursively finds all files in the input directory.
+ Returns a list of file directory strings, which do not include directories but only files.
+ List is sorted in alphabetical order of the file directories.
+
+ Args:
+ dir: Directory to get the files. String.
+
+ Returns:
+ A list of file directory strings within the input directory.
+ Sorted in Alphabetical order.
+
+ Raises:
+ FileNotFoundError: An error occurred accessing the non-existing directory
+ """
+
+ if not dir_exists(dir):
+ raise FileNotFoundError("Directory does not exist")
+
+ if dir[:-2] != "**":
+ if dir[:-1] != "/":
+ dir += "/"
+ dir += "**"
+
+ return [file for file in sorted(glob(dir, recursive=True)) if Path(file).is_file()]
+
+ def map_common_files(self, files: List[str], dir: str) -> None:
+ for file in files:
+ file_name = file.split(dir, 1)[-1]
+ self.common_file_map[file_name].add(dir)
+ return
+
+ def compare_file_contents(self, first_file: str, second_file: str) -> List[str]:
+ """Compare the contents of the files and return different lines
+
+ Given two file directory strings, compare the contents of the two files
+ and return the list of file contents string prepended with unique identifier codes.
+ The identifier codes include:
+ - ' '(two empty space characters): Line common to two files
+ - '- '(minus followed by a space) : Line unique to first file
+ - '+ '(plus followed by a space) : Line unique to second file
+
+ Args:
+ first_file: First file directory string to compare the content
+ second_file: Second file directory string to compare the content
+
+ Returns:
+ A list of the file content strings. For example:
+
+ [
+ " Foo",
+ "- Bar",
+ "+ Baz"
+ ]
+ """
+
+ d = Differ()
+ first_file_contents = sort_methods(get_file_contents(first_file))
+ second_file_contents = sort_methods(get_file_contents(second_file))
+ diff = list(d.compare(first_file_contents, second_file_contents))
+ ret = [f"diff {first_file} {second_file}"]
+
+ idx = 0
+ while idx < len(diff):
+ line = diff[idx]
+ line_code = line[:DIFFER_CODE_LEN]
+
+ match line_code:
+ case DifferCodes.COMMON:
+ if self.include_common:
+ ret.append(line)
+
+ case DifferCodes.UNIQUE_FIRST:
+ # Should compare line
+ if (idx < len(diff) - 1 and
+ (next_line_code := diff[idx + 1][:DIFFER_CODE_LEN])
+ not in (DifferCodes.UNIQUE_FIRST, DifferCodes.COMMON)):
+ delta = 1 if next_line_code == DifferCodes.UNIQUE_SECOND else 2
+ line_to_compare = diff[idx + delta]
+ if self.lines_differ(line, line_to_compare):
+ ret.extend([line, line_to_compare])
+ else:
+ if self.include_common:
+ ret.append(DifferCodes.COMMON +
+ line[DIFFER_CODE_LEN:])
+ idx += delta
+ else:
+ ret.append(line)
+
+ case DifferCodes.UNIQUE_SECOND:
+ ret.append(line)
+
+ case DifferCodes.DIFF_IDENT:
+ pass
+ idx += 1
+ return ret
+
+ def lines_differ(self, line1: str, line2: str) -> bool:
+ """Check if the input lines are different or not
+
+ Compare the two lines word by word and check if the two lines are different or not.
+ If the different words in the comparing lines are included in skip_words,
+ the lines are not considered different.
+
+ Args:
+ line1: first line to compare
+ line2: second line to compare
+
+ Returns:
+ Boolean value indicating if the two lines are different or not
+
+ """
+ # Split by '.' or ' '(whitespace)
+ def split_words(line: str) -> List[str]:
+ return split('\\s|\\.', line[DIFFER_CODE_LEN:])
+
+ line1_words, line2_words = split_words(line1), split_words(line2)
+ if len(line1_words) != len(line2_words):
+ return True
+
+ for word1, word2 in zip(line1_words, line2_words):
+ if word1 != word2:
+ # not check if words are equal to skip word, but
+ # check if words contain skip word as substring
+ if all(sw not in word1 and sw not in word2 for sw in self.skip_words):
+ return True
+
+ return False
+
+ def analyze(self) -> None:
+ """Analyze file contents in both directories and write to output or console.
+ """
+ for file in tqdm(sorted(self.common_file_map.keys())):
+ val = self.common_file_map[file]
+
+ # When file exists in both directories
+ lines = list()
+ if val == set([self.first_dir, self.second_dir]):
+ lines = self.compare_file_contents(
+ self.first_dir + file, self.second_dir + file)
+ else:
+ existing_dir, not_existing_dir = (
+ (self.first_dir, self.second_dir) if self.first_dir in val
+ else (self.second_dir, self.first_dir))
+
+ lines = [f"{not_existing_dir}{file} does not exist."]
+
+ if self.show_diff:
+ lines.append(f"Content of {existing_dir}{file}: \n")
+ lines.extend(get_file_contents(existing_dir + file))
+
+ self.write(lines)
+
+ def write(self, lines: List[str]) -> None:
+ if self.out_dir == "":
+ pprint(lines)
+ else:
+ write_lines(self.out_dir, lines)
+
+###
+# Helper functions
+###
+
+def sort_methods(lines: List[str]) -> List[str]:
+ """Sort class methods in the file contents by alphabetical order
+
+ Given lines of Java file contents, return lines with class methods sorted in alphabetical order.
+ Also omit empty lines or lines with spaces.
+ For example:
+ l = [
+ "package android.test;",
+ "",
+ "public static final int ORANGE = 1;",
+ "",
+ "public class TestClass {",
+ "public TestClass() { throw new RuntimeException("Stub!"); }",
+ "public void foo() { throw new RuntimeException("Stub!"); }",
+ "public void bar() { throw new RuntimeException("Stub!"); }",
+ "}"
+ ]
+ sort_methods(l) returns
+ [
+ "package android.test;",
+ "public static final int ORANGE = 1;",
+ "public class TestClass {",
+ "public TestClass() { throw new RuntimeException("Stub!"); }",
+ "public void bar() { throw new RuntimeException("Stub!"); }",
+ "public void foo() { throw new RuntimeException("Stub!"); }",
+ "}"
+ ]
+
+ Args:
+ lines: List of strings consisted of Java file contents.
+
+ Returns:
+ A list of string with sorted class methods.
+
+ """
+ def is_not_blank(l: str) -> bool:
+ return bool(l) and not l.isspace()
+
+ ret = list()
+
+ in_class = False
+ buffer = list()
+ for line in lines:
+ if not in_class:
+ if "class" in line:
+ in_class = True
+ ret.append(line)
+ else:
+ # Adding static variables, package info, etc.
+ # Skipping empty or space lines.
+ if is_not_blank(line):
+ ret.append(line)
+ else:
+ # End of class
+ if line and line[0] == "}":
+ in_class = False
+ ret.extend(sorted(buffer))
+ buffer = list()
+ ret.append(line)
+ else:
+ if is_not_blank(line):
+ buffer.append(line)
+
+ return ret
+
+def get_file_contents(file_path: str) -> List[str]:
+ lines = list()
+ with open(file_path) as f:
+ lines = [line.rstrip('\n') for line in f]
+ f.close()
+ return lines
+
+def pprint(l: List[str]) -> None:
+ for line in l:
+ print(line)
+
+def write_lines(out_dir: str, lines: List[str]) -> None:
+ with open(out_dir, "a") as f:
+ f.writelines(line + '\n' for line in lines)
+ f.write("\n")
+ f.close()
+
+def dir_exists(dir: str) -> bool:
+ return Path(dir).exists()
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('first_dir', action='store', type=str,
+ help="first path to compare file directory and contents")
+ parser.add_argument('second_dir', action='store', type=str,
+ help="second path to compare file directory and contents")
+ parser.add_argument('--out', dest='out_dir',
+ action='store', default="", type=str,
+ help="optional directory to write log. If not set, will print to console")
+ parser.add_argument('--show-diff-file', dest='show_diff',
+ action=argparse.BooleanOptionalAction,
+ help="optional flag. If passed, will print out the content of the file unique to each directories")
+ parser.add_argument('--include-common', dest='include_common',
+ action=argparse.BooleanOptionalAction,
+ help="optional flag. If passed, will print out the contents common to both files as well,\
+ instead of printing only diff lines.")
+ parser.add_argument('--skip-words', nargs='+',
+ dest='skip_words', default=[], help="optional words to skip in comparison")
+
+ args = parser.parse_args()
+
+ if not args.first_dir or not args.second_dir:
+ parser.print_usage()
+ exit(0)
+
+ analyzer = FilesDiffAnalyzer(args)
+ analyzer.analyze()