diff options
author | 2017-07-24 18:19:36 -0700 | |
---|---|---|
committer | 2017-07-25 15:53:15 -0700 | |
commit | e967d3f6ac2e1e1f612f99b9c76abcb9e13bb7a2 (patch) | |
tree | 57a3a90478daeaf536af29463e3ac3866777511c | |
parent | 3370d3230d4e3084ca95eb6d6bb63f27c3cb6f63 (diff) |
AAPT2: Fix JavaDoc first sentence extraction.
The old algorithm for detecting the first sentence of a JavaDoc comment
looked for the first occurence of '.'. This does not work when code or a
{@link android.R.styleable} link is encountered in the first sentence.
Switch to checking for whitespace characters after the '.' character.
Bug: 62900335
Test: make aapt2_tests
Change-Id: I8238f6a6304c9c2f92e2e576ca8962a59c2b20ea
-rw-r--r-- | tools/aapt2/java/AnnotationProcessor.cpp | 19 | ||||
-rw-r--r-- | tools/aapt2/java/AnnotationProcessor.h | 2 | ||||
-rw-r--r-- | tools/aapt2/java/AnnotationProcessor_test.cpp | 24 | ||||
-rw-r--r-- | tools/aapt2/java/JavaClassGenerator.cpp | 20 | ||||
-rw-r--r-- | tools/aapt2/text/Unicode.cpp | 11 | ||||
-rw-r--r-- | tools/aapt2/text/Unicode.h | 4 | ||||
-rw-r--r-- | tools/aapt2/text/Utf8Iterator.cpp | 11 | ||||
-rw-r--r-- | tools/aapt2/text/Utf8Iterator.h | 5 | ||||
-rw-r--r-- | tools/aapt2/text/Utf8Iterator_test.cpp | 28 | ||||
-rw-r--r-- | tools/aapt2/tools/extract_unicode_properties.py | 34 |
10 files changed, 119 insertions, 39 deletions
diff --git a/tools/aapt2/java/AnnotationProcessor.cpp b/tools/aapt2/java/AnnotationProcessor.cpp index a0ef00b1ea1f..1f83fa098d74 100644 --- a/tools/aapt2/java/AnnotationProcessor.cpp +++ b/tools/aapt2/java/AnnotationProcessor.cpp @@ -18,12 +18,29 @@ #include <algorithm> +#include "text/Unicode.h" +#include "text/Utf8Iterator.h" #include "util/Util.h" -using android::StringPiece; +using ::aapt::text::Utf8Iterator; +using ::android::StringPiece; namespace aapt { +StringPiece AnnotationProcessor::ExtractFirstSentence(const StringPiece& comment) { + Utf8Iterator iter(comment); + while (iter.HasNext()) { + const char32_t codepoint = iter.Next(); + if (codepoint == U'.') { + const size_t current_position = iter.Position(); + if (!iter.HasNext() || text::IsWhitespace(iter.Next())) { + return comment.substr(0, current_position); + } + } + } + return comment; +} + void AnnotationProcessor::AppendCommentLine(std::string& comment) { static const std::string sDeprecated = "@deprecated"; static const std::string sSystemApi = "@SystemApi"; diff --git a/tools/aapt2/java/AnnotationProcessor.h b/tools/aapt2/java/AnnotationProcessor.h index 99cd44fd2cc1..a06eda0f9c5c 100644 --- a/tools/aapt2/java/AnnotationProcessor.h +++ b/tools/aapt2/java/AnnotationProcessor.h @@ -53,6 +53,8 @@ namespace aapt { */ class AnnotationProcessor { public: + static android::StringPiece ExtractFirstSentence(const android::StringPiece& comment); + /** * Adds more comments. Since resources can have various values with different * configurations, diff --git a/tools/aapt2/java/AnnotationProcessor_test.cpp b/tools/aapt2/java/AnnotationProcessor_test.cpp index 3e43c4295c07..9ccac8888426 100644 --- a/tools/aapt2/java/AnnotationProcessor_test.cpp +++ b/tools/aapt2/java/AnnotationProcessor_test.cpp @@ -18,6 +18,10 @@ #include "test/Test.h" +using ::testing::Eq; +using ::testing::HasSubstr; +using ::testing::Not; + namespace aapt { TEST(AnnotationProcessorTest, EmitsDeprecated) { @@ -33,7 +37,7 @@ TEST(AnnotationProcessorTest, EmitsDeprecated) { processor.WriteToStream(&result, ""); std::string annotations = result.str(); - EXPECT_NE(std::string::npos, annotations.find("@Deprecated")); + EXPECT_THAT(annotations, HasSubstr("@Deprecated")); } TEST(AnnotationProcessorTest, EmitsSystemApiAnnotationAndRemovesFromComment) { @@ -44,10 +48,20 @@ TEST(AnnotationProcessorTest, EmitsSystemApiAnnotationAndRemovesFromComment) { processor.WriteToStream(&result, ""); std::string annotations = result.str(); - EXPECT_NE(std::string::npos, - annotations.find("@android.annotation.SystemApi")); - EXPECT_EQ(std::string::npos, annotations.find("@SystemApi")); - EXPECT_NE(std::string::npos, annotations.find("This is a system API")); + EXPECT_THAT(annotations, HasSubstr("@android.annotation.SystemApi")); + EXPECT_THAT(annotations, Not(HasSubstr("@SystemApi"))); + EXPECT_THAT(annotations, HasSubstr("This is a system API")); +} + +TEST(AnnotationProcessor, ExtractsFirstSentence) { + EXPECT_THAT(AnnotationProcessor::ExtractFirstSentence("This is the only sentence"), + Eq("This is the only sentence")); + EXPECT_THAT(AnnotationProcessor::ExtractFirstSentence( + "This is the\n first sentence. This is the rest of the paragraph."), + Eq("This is the\n first sentence.")); + EXPECT_THAT(AnnotationProcessor::ExtractFirstSentence( + "This is the first sentence with a {@link android.R.styleable.Theme}."), + Eq("This is the first sentence with a {@link android.R.styleable.Theme}.")); } } // namespace aapt diff --git a/tools/aapt2/java/JavaClassGenerator.cpp b/tools/aapt2/java/JavaClassGenerator.cpp index 2a23aa9e5372..44fa0f19a0e5 100644 --- a/tools/aapt2/java/JavaClassGenerator.cpp +++ b/tools/aapt2/java/JavaClassGenerator.cpp @@ -299,24 +299,16 @@ void JavaClassGenerator::ProcessStyleable(const ResourceNameRef& name, const Res } const ResourceName& attr_name = entry.attr_ref->name.value(); - styleable_comment << "<tr><td>"; - styleable_comment << "<code>{@link #" << entry.field_name << " " - << (!attr_name.package.empty() - ? attr_name.package - : context_->GetCompilationPackage()) - << ":" << attr_name.entry << "}</code>"; - styleable_comment << "</td>"; - - styleable_comment << "<td>"; + styleable_comment << "<tr><td><code>{@link #" << entry.field_name << " " + << (!attr_name.package.empty() ? attr_name.package + : context_->GetCompilationPackage()) + << ":" << attr_name.entry << "}</code></td>"; // Only use the comment up until the first '.'. This is to stay compatible with // the way old AAPT did it (presumably to keep it short and to avoid including // annotations like @hide which would affect this Styleable). - auto iter = std::find(attr_comment_line.begin(), attr_comment_line.end(), '.'); - if (iter != attr_comment_line.end()) { - attr_comment_line = attr_comment_line.substr(0, (iter - attr_comment_line.begin()) + 1); - } - styleable_comment << attr_comment_line << "</td></tr>\n"; + styleable_comment << "<td>" << AnnotationProcessor::ExtractFirstSentence(attr_comment_line) + << "</td></tr>\n"; } styleable_comment << "</table>\n"; diff --git a/tools/aapt2/text/Unicode.cpp b/tools/aapt2/text/Unicode.cpp index 38ec9c465ec2..75eeb46c7f5e 100644 --- a/tools/aapt2/text/Unicode.cpp +++ b/tools/aapt2/text/Unicode.cpp @@ -66,6 +66,17 @@ bool IsXidContinue(char32_t codepoint) { return FindCharacterProperties(codepoint) & CharacterProperties::kXidContinue; } +// Hardcode the White_Space characters since they are few and the external/icu project doesn't +// list them as data files to parse. +// Sourced from http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt +bool IsWhitespace(char32_t codepoint) { + return (codepoint >= 0x0009 && codepoint <= 0x000d) || (codepoint == 0x0020) || + (codepoint == 0x0085) || (codepoint == 0x00a0) || (codepoint == 0x1680) || + (codepoint >= 0x2000 && codepoint <= 0x200a) || (codepoint == 0x2028) || + (codepoint == 0x2029) || (codepoint == 0x202f) || (codepoint == 0x205f) || + (codepoint == 0x3000); +} + bool IsJavaIdentifier(const StringPiece& str) { Utf8Iterator iter(str); diff --git a/tools/aapt2/text/Unicode.h b/tools/aapt2/text/Unicode.h index 270718734cff..546714e9a290 100644 --- a/tools/aapt2/text/Unicode.h +++ b/tools/aapt2/text/Unicode.h @@ -40,6 +40,10 @@ bool IsXidStart(char32_t codepoint); // characters in the ID_Continue set. bool IsXidContinue(char32_t codepoint); +// Returns true if the Unicode codepoint has the White_Space property. +// http://unicode.org/reports/tr44/#White_Space +bool IsWhitespace(char32_t codepoint); + // Returns true if the UTF8 string can be used as a Java identifier. // NOTE: This does not check against the set of reserved Java keywords. bool IsJavaIdentifier(const android::StringPiece& str); diff --git a/tools/aapt2/text/Utf8Iterator.cpp b/tools/aapt2/text/Utf8Iterator.cpp index 0d43353ef39a..20b9073b9a26 100644 --- a/tools/aapt2/text/Utf8Iterator.cpp +++ b/tools/aapt2/text/Utf8Iterator.cpp @@ -25,18 +25,17 @@ namespace aapt { namespace text { Utf8Iterator::Utf8Iterator(const StringPiece& str) - : str_(str), next_pos_(0), current_codepoint_(0) { + : str_(str), current_pos_(0), next_pos_(0), current_codepoint_(0) { DoNext(); } void Utf8Iterator::DoNext() { - size_t next_pos = 0u; - int32_t result = utf32_from_utf8_at(str_.data(), str_.size(), next_pos_, &next_pos); + current_pos_ = next_pos_; + int32_t result = utf32_from_utf8_at(str_.data(), str_.size(), current_pos_, &next_pos_); if (result == -1) { current_codepoint_ = 0u; } else { current_codepoint_ = static_cast<char32_t>(result); - next_pos_ = next_pos; } } @@ -44,6 +43,10 @@ bool Utf8Iterator::HasNext() const { return current_codepoint_ != 0; } +size_t Utf8Iterator::Position() const { + return current_pos_; +} + void Utf8Iterator::Skip(int amount) { while (amount > 0 && HasNext()) { Next(); diff --git a/tools/aapt2/text/Utf8Iterator.h b/tools/aapt2/text/Utf8Iterator.h index 6923957a5a22..9318401876d1 100644 --- a/tools/aapt2/text/Utf8Iterator.h +++ b/tools/aapt2/text/Utf8Iterator.h @@ -29,6 +29,10 @@ class Utf8Iterator { bool HasNext() const; + // Returns the current position of the iterator in bytes of the source UTF8 string. + // This position is the start of the codepoint returned by the next call to Next(). + size_t Position() const; + void Skip(int amount); char32_t Next(); @@ -39,6 +43,7 @@ class Utf8Iterator { void DoNext(); android::StringPiece str_; + size_t current_pos_; size_t next_pos_; char32_t current_codepoint_; }; diff --git a/tools/aapt2/text/Utf8Iterator_test.cpp b/tools/aapt2/text/Utf8Iterator_test.cpp index f3111c081276..8c3e77446595 100644 --- a/tools/aapt2/text/Utf8Iterator_test.cpp +++ b/tools/aapt2/text/Utf8Iterator_test.cpp @@ -18,6 +18,7 @@ #include "test/Test.h" +using ::android::StringPiece; using ::testing::Eq; namespace aapt { @@ -63,5 +64,32 @@ TEST(Utf8IteratorTest, IteratesOverUnicode) { EXPECT_FALSE(iter.HasNext()); } +TEST(Utf8IteratorTest, PositionPointsToTheCorrectPlace) { + const StringPiece expected("Mm🍩"); + Utf8Iterator iter(expected); + + // Before any character, the position should be 0. + EXPECT_THAT(iter.Position(), Eq(0u)); + + // The 'M' character, one byte. + ASSERT_TRUE(iter.HasNext()); + iter.Next(); + EXPECT_THAT(iter.Position(), Eq(1u)); + + // The 'm' character, one byte. + ASSERT_TRUE(iter.HasNext()); + iter.Next(); + EXPECT_THAT(iter.Position(), Eq(2u)); + + // The doughnut character, 4 bytes. + ASSERT_TRUE(iter.HasNext()); + iter.Next(); + EXPECT_THAT(iter.Position(), Eq(6u)); + + // There should be nothing left. + EXPECT_FALSE(iter.HasNext()); + EXPECT_THAT(iter.Position(), Eq(expected.size())); +} + } // namespace text } // namespace aapt diff --git a/tools/aapt2/tools/extract_unicode_properties.py b/tools/aapt2/tools/extract_unicode_properties.py index d7e0479bb788..7577ec82aa86 100644 --- a/tools/aapt2/tools/extract_unicode_properties.py +++ b/tools/aapt2/tools/extract_unicode_properties.py @@ -35,9 +35,8 @@ class CharacterProperty: return "{}0x{:04x}, 0x{:04x}, {}{}".format( "{", self.first_char, self.last_char, ' | '.join(types), "}") -def extract_unicode_properties(f, props): - prog = re.compile(r"^(?P<first>\w{4})(..(?P<last>\w{4}))?\W+;\W+(?P<prop>\w+)\n$") - chars = {} +def extract_unicode_properties(f, props, chars_out): + prog = re.compile(r"^(?P<first>\w{4})(..(?P<last>\w{4}))?\W+;\W+(?P<prop>\w+)") for line in f: result = prog.match(line) if result: @@ -49,10 +48,12 @@ def extract_unicode_properties(f, props): last_char = (int(last_char_str, 16) if last_char_str else start_char) + 1 prop_type = props[prop_type_str] for char in range(start_char, last_char): - if char not in chars: - chars[char] = CharacterProperty(char, char, 0) - chars[char].prop_type |= prop_type + if char not in chars_out: + chars_out[char] = CharacterProperty(char, char, 0) + chars_out[char].prop_type |= prop_type + return chars_out +def flatten_unicode_properties(chars): result = [] for char_prop in sorted(chars.values(), key=CharacterProperty.key): if len(result) == 0: @@ -82,17 +83,20 @@ license = """/* """ if __name__ == "__main__": - if len(sys.argv) != 2: + if len(sys.argv) < 2: print("must specify path to icu DerivedCoreProperties file (e.g:" \ "external/icu/icu4c/source/data/unidata/DerivedCoreProperties.txt)") sys.exit(1) - with open(sys.argv[1]) as f: - props = {"XID_Start": 1, "XID_Continue": 2} - char_props = extract_unicode_properties(f, props) - print("{}\nconst static std::array<CharacterProperties, {}> sCharacterProperties = {}" - .format(license, len(char_props), "{{")) - for prop in char_props: - print(" {},".format(prop)) - print("}};") + props = {"XID_Start": 1, "XID_Continue": 2} + char_props = {} + for file_path in sys.argv[1:]: + with open(file_path) as f: + extract_unicode_properties(f, props, char_props) + result = flatten_unicode_properties(char_props) + print("{}\nconst static std::array<CharacterProperties, {}> sCharacterProperties = {}" + .format(license, len(result), "{{")) + for prop in result: + print(" {},".format(prop)) + print("}};") |