| #!/usr/bin/env python |
| # |
| # Copyright (C) 2022 The Android Open Source Project |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| from sys import exit |
| from typing import List |
| from glob import glob |
| from pathlib import Path |
| from collections import defaultdict |
| from difflib import Differ |
| from re import split |
| from tqdm import tqdm |
| import argparse |
| |
| |
| DIFFER_CODE_LEN = 2 |
| |
| class DifferCodes: |
| COMMON = ' ' |
| UNIQUE_FIRST = '- ' |
| UNIQUE_SECOND = '+ ' |
| DIFF_IDENT = '? ' |
| |
| class FilesDiffAnalyzer: |
| def __init__(self, args) -> None: |
| self.out_dir = args.out_dir |
| self.show_diff = args.show_diff |
| self.skip_words = args.skip_words |
| self.first_dir = args.first_dir |
| self.second_dir = args.second_dir |
| self.include_common = args.include_common |
| |
| self.first_dir_files = self.get_files(self.first_dir) |
| self.second_dir_files = self.get_files(self.second_dir) |
| self.common_file_map = defaultdict(set) |
| |
| self.map_common_files(self.first_dir_files, self.first_dir) |
| self.map_common_files(self.second_dir_files, self.second_dir) |
| |
| def get_files(self, dir: str) -> List[str]: |
| """Get all files directory in the input directory including the files in the subdirectories |
| |
| Recursively finds all files in the input directory. |
| Returns a list of file directory strings, which do not include directories but only files. |
| List is sorted in alphabetical order of the file directories. |
| |
| Args: |
| dir: Directory to get the files. String. |
| |
| Returns: |
| A list of file directory strings within the input directory. |
| Sorted in Alphabetical order. |
| |
| Raises: |
| FileNotFoundError: An error occurred accessing the non-existing directory |
| """ |
| |
| if not dir_exists(dir): |
| raise FileNotFoundError("Directory does not exist") |
| |
| if dir[:-2] != "**": |
| if dir[:-1] != "/": |
| dir += "/" |
| dir += "**" |
| |
| return [file for file in sorted(glob(dir, recursive=True)) if Path(file).is_file()] |
| |
| def map_common_files(self, files: List[str], dir: str) -> None: |
| for file in files: |
| file_name = file.split(dir, 1)[-1] |
| self.common_file_map[file_name].add(dir) |
| return |
| |
| def compare_file_contents(self, first_file: str, second_file: str) -> List[str]: |
| """Compare the contents of the files and return different lines |
| |
| Given two file directory strings, compare the contents of the two files |
| and return the list of file contents string prepended with unique identifier codes. |
| The identifier codes include: |
| - ' '(two empty space characters): Line common to two files |
| - '- '(minus followed by a space) : Line unique to first file |
| - '+ '(plus followed by a space) : Line unique to second file |
| |
| Args: |
| first_file: First file directory string to compare the content |
| second_file: Second file directory string to compare the content |
| |
| Returns: |
| A list of the file content strings. For example: |
| |
| [ |
| " Foo", |
| "- Bar", |
| "+ Baz" |
| ] |
| """ |
| |
| d = Differ() |
| first_file_contents = sort_methods(get_file_contents(first_file)) |
| second_file_contents = sort_methods(get_file_contents(second_file)) |
| diff = list(d.compare(first_file_contents, second_file_contents)) |
| ret = [f"diff {first_file} {second_file}"] |
| |
| idx = 0 |
| while idx < len(diff): |
| line = diff[idx] |
| line_code = line[:DIFFER_CODE_LEN] |
| |
| match line_code: |
| case DifferCodes.COMMON: |
| if self.include_common: |
| ret.append(line) |
| |
| case DifferCodes.UNIQUE_FIRST: |
| # Should compare line |
| if (idx < len(diff) - 1 and |
| (next_line_code := diff[idx + 1][:DIFFER_CODE_LEN]) |
| not in (DifferCodes.UNIQUE_FIRST, DifferCodes.COMMON)): |
| delta = 1 if next_line_code == DifferCodes.UNIQUE_SECOND else 2 |
| line_to_compare = diff[idx + delta] |
| if self.lines_differ(line, line_to_compare): |
| ret.extend([line, line_to_compare]) |
| else: |
| if self.include_common: |
| ret.append(DifferCodes.COMMON + |
| line[DIFFER_CODE_LEN:]) |
| idx += delta |
| else: |
| ret.append(line) |
| |
| case DifferCodes.UNIQUE_SECOND: |
| ret.append(line) |
| |
| case DifferCodes.DIFF_IDENT: |
| pass |
| idx += 1 |
| return ret |
| |
| def lines_differ(self, line1: str, line2: str) -> bool: |
| """Check if the input lines are different or not |
| |
| Compare the two lines word by word and check if the two lines are different or not. |
| If the different words in the comparing lines are included in skip_words, |
| the lines are not considered different. |
| |
| Args: |
| line1: first line to compare |
| line2: second line to compare |
| |
| Returns: |
| Boolean value indicating if the two lines are different or not |
| |
| """ |
| # Split by '.' or ' '(whitespace) |
| def split_words(line: str) -> List[str]: |
| return split('\\s|\\.', line[DIFFER_CODE_LEN:]) |
| |
| line1_words, line2_words = split_words(line1), split_words(line2) |
| if len(line1_words) != len(line2_words): |
| return True |
| |
| for word1, word2 in zip(line1_words, line2_words): |
| if word1 != word2: |
| # not check if words are equal to skip word, but |
| # check if words contain skip word as substring |
| if all(sw not in word1 and sw not in word2 for sw in self.skip_words): |
| return True |
| |
| return False |
| |
| def analyze(self) -> None: |
| """Analyze file contents in both directories and write to output or console. |
| """ |
| for file in tqdm(sorted(self.common_file_map.keys())): |
| val = self.common_file_map[file] |
| |
| # When file exists in both directories |
| lines = list() |
| if val == set([self.first_dir, self.second_dir]): |
| lines = self.compare_file_contents( |
| self.first_dir + file, self.second_dir + file) |
| else: |
| existing_dir, not_existing_dir = ( |
| (self.first_dir, self.second_dir) if self.first_dir in val |
| else (self.second_dir, self.first_dir)) |
| |
| lines = [f"{not_existing_dir}{file} does not exist."] |
| |
| if self.show_diff: |
| lines.append(f"Content of {existing_dir}{file}: \n") |
| lines.extend(get_file_contents(existing_dir + file)) |
| |
| self.write(lines) |
| |
| def write(self, lines: List[str]) -> None: |
| if self.out_dir == "": |
| pprint(lines) |
| else: |
| write_lines(self.out_dir, lines) |
| |
| ### |
| # Helper functions |
| ### |
| |
| def sort_methods(lines: List[str]) -> List[str]: |
| """Sort class methods in the file contents by alphabetical order |
| |
| Given lines of Java file contents, return lines with class methods sorted in alphabetical order. |
| Also omit empty lines or lines with spaces. |
| For example: |
| l = [ |
| "package android.test;", |
| "", |
| "public static final int ORANGE = 1;", |
| "", |
| "public class TestClass {", |
| "public TestClass() { throw new RuntimeException("Stub!"); }", |
| "public void foo() { throw new RuntimeException("Stub!"); }", |
| "public void bar() { throw new RuntimeException("Stub!"); }", |
| "}" |
| ] |
| sort_methods(l) returns |
| [ |
| "package android.test;", |
| "public static final int ORANGE = 1;", |
| "public class TestClass {", |
| "public TestClass() { throw new RuntimeException("Stub!"); }", |
| "public void bar() { throw new RuntimeException("Stub!"); }", |
| "public void foo() { throw new RuntimeException("Stub!"); }", |
| "}" |
| ] |
| |
| Args: |
| lines: List of strings consisted of Java file contents. |
| |
| Returns: |
| A list of string with sorted class methods. |
| |
| """ |
| def is_not_blank(l: str) -> bool: |
| return bool(l) and not l.isspace() |
| |
| ret = list() |
| |
| in_class = False |
| buffer = list() |
| for line in lines: |
| if not in_class: |
| if "class" in line: |
| in_class = True |
| ret.append(line) |
| else: |
| # Adding static variables, package info, etc. |
| # Skipping empty or space lines. |
| if is_not_blank(line): |
| ret.append(line) |
| else: |
| # End of class |
| if line and line[0] == "}": |
| in_class = False |
| ret.extend(sorted(buffer)) |
| buffer = list() |
| ret.append(line) |
| else: |
| if is_not_blank(line): |
| buffer.append(line) |
| |
| return ret |
| |
| def get_file_contents(file_path: str) -> List[str]: |
| lines = list() |
| with open(file_path) as f: |
| lines = [line.rstrip('\n') for line in f] |
| f.close() |
| return lines |
| |
| def pprint(l: List[str]) -> None: |
| for line in l: |
| print(line) |
| |
| def write_lines(out_dir: str, lines: List[str]) -> None: |
| with open(out_dir, "a") as f: |
| f.writelines(line + '\n' for line in lines) |
| f.write("\n") |
| f.close() |
| |
| def dir_exists(dir: str) -> bool: |
| return Path(dir).exists() |
| |
| if __name__ == '__main__': |
| parser = argparse.ArgumentParser() |
| parser.add_argument('first_dir', action='store', type=str, |
| help="first path to compare file directory and contents") |
| parser.add_argument('second_dir', action='store', type=str, |
| help="second path to compare file directory and contents") |
| parser.add_argument('--out', dest='out_dir', |
| action='store', default="", type=str, |
| help="optional directory to write log. If not set, will print to console") |
| parser.add_argument('--show-diff-file', dest='show_diff', |
| action=argparse.BooleanOptionalAction, |
| help="optional flag. If passed, will print out the content of the file unique to each directories") |
| parser.add_argument('--include-common', dest='include_common', |
| action=argparse.BooleanOptionalAction, |
| help="optional flag. If passed, will print out the contents common to both files as well,\ |
| instead of printing only diff lines.") |
| parser.add_argument('--skip-words', nargs='+', |
| dest='skip_words', default=[], help="optional words to skip in comparison") |
| |
| args = parser.parse_args() |
| |
| if not args.first_dir or not args.second_dir: |
| parser.print_usage() |
| exit(0) |
| |
| analyzer = FilesDiffAnalyzer(args) |
| analyzer.analyze() |