From 8049f3da712ea9c3154b57ce2276c97e749d1f2c Mon Sep 17 00:00:00 2001 From: Adam Lesinski Date: Fri, 31 Mar 2017 18:28:14 -0700 Subject: AAPT2: Fix pseudolocalization (again) Pseudolocalization didn't properly handle spans in strings like "Hello". The spans would be identical and when doing range checks only one of them would be updated. Switched to a more robust way of extracting the relevant chunks of a styled string. This uses a stack, which is more in line with the real representation in XML. Bug: 34088357 Test: make aapt2_tests Change-Id: Ia4e4501713e688c96a89e26e4e2b1384f4cd3889 --- tools/aapt2/ResourceParser.cpp | 12 +- tools/aapt2/ResourceParser_test.cpp | 12 +- tools/aapt2/compile/PseudolocaleGenerator.cpp | 293 ++++++++++++--------- tools/aapt2/compile/PseudolocaleGenerator_test.cpp | 146 ++++++++-- 4 files changed, 311 insertions(+), 152 deletions(-) diff --git a/tools/aapt2/ResourceParser.cpp b/tools/aapt2/ResourceParser.cpp index 8461905d8034..90f713b67985 100644 --- a/tools/aapt2/ResourceParser.cpp +++ b/tools/aapt2/ResourceParser.cpp @@ -155,7 +155,10 @@ bool ResourceParser::FlattenXmlSubtree( xml::XmlPullParser* parser, std::string* out_raw_string, StyleString* out_style_string, std::vector* out_untranslatable_sections) { // Keeps track of formatting tags (, ) and the range of characters for which they apply. - std::vector span_stack; + // The stack elements refer to the indices in out_style_string->spans. + // By first adding to the out_style_string->spans vector, and then using the stack to refer + // to this vector, the original order of tags is preserved in cases such as hello. + std::vector span_stack; // Clear the output variables. out_raw_string->clear(); @@ -192,7 +195,9 @@ bool ResourceParser::FlattenXmlSubtree( return false; } - span_stack.push_back(Span{std::move(span_name), static_cast(builder.Utf16Len())}); + out_style_string->spans.push_back( + Span{std::move(span_name), static_cast(builder.Utf16Len())}); + span_stack.push_back(out_style_string->spans.size() - 1); } else if (parser->element_namespace() == sXliffNamespaceUri) { if (parser->element_name() == "g") { if (untranslatable_start_depth) { @@ -233,9 +238,8 @@ bool ResourceParser::FlattenXmlSubtree( if (parser->element_namespace().empty()) { // This is an HTML tag which we encode as a span. Update the span // stack and pop the top entry. - Span& top_span = span_stack.back(); + Span& top_span = out_style_string->spans[span_stack.back()]; top_span.last_char = builder.Utf16Len() - 1; - out_style_string->spans.push_back(std::move(top_span)); span_stack.pop_back(); } else if (untranslatable_start_depth == make_value(depth)) { // This is the end of an untranslatable section. Use UTF8 indices/lengths. diff --git a/tools/aapt2/ResourceParser_test.cpp b/tools/aapt2/ResourceParser_test.cpp index eefa320a4418..8062c2e6afea 100644 --- a/tools/aapt2/ResourceParser_test.cpp +++ b/tools/aapt2/ResourceParser_test.cpp @@ -101,20 +101,24 @@ TEST_F(ResourceParserTest, ParseStyledString) { // Use a surrogate pair unicode point so that we can verify that the span // indices use UTF-16 length and not UTF-8 length. std::string input = - "This is my aunt\u2019s string"; + "This is my aunt\u2019s fickle string"; ASSERT_TRUE(TestParse(input)); StyledString* str = test::GetValue(&table_, "string/foo"); ASSERT_NE(nullptr, str); - const std::string expected_str = "This is my aunt\u2019s string"; + const std::string expected_str = "This is my aunt\u2019s fickle string"; EXPECT_EQ(expected_str, *str->value->str); - EXPECT_EQ(1u, str->value->spans.size()); + EXPECT_EQ(2u, str->value->spans.size()); EXPECT_TRUE(str->untranslatable_sections.empty()); EXPECT_EQ(std::string("b"), *str->value->spans[0].name); EXPECT_EQ(17u, str->value->spans[0].first_char); - EXPECT_EQ(23u, str->value->spans[0].last_char); + EXPECT_EQ(30u, str->value->spans[0].last_char); + + EXPECT_EQ(std::string("small"), *str->value->spans[1].name); + EXPECT_EQ(24u, str->value->spans[1].first_char); + EXPECT_EQ(30u, str->value->spans[1].last_char); } TEST_F(ResourceParserTest, ParseStringWithWhitespace) { diff --git a/tools/aapt2/compile/PseudolocaleGenerator.cpp b/tools/aapt2/compile/PseudolocaleGenerator.cpp index fad9edd04e4c..a031ea4c31ec 100644 --- a/tools/aapt2/compile/PseudolocaleGenerator.cpp +++ b/tools/aapt2/compile/PseudolocaleGenerator.cpp @@ -22,136 +22,194 @@ #include "ResourceValues.h" #include "ValueVisitor.h" #include "compile/Pseudolocalizer.h" +#include "util/Util.h" using android::StringPiece; +using android::StringPiece16; namespace aapt { -std::unique_ptr PseudolocalizeStyledString( - StyledString* string, Pseudolocalizer::Method method, StringPool* pool) { - Pseudolocalizer localizer(method); +// The struct that represents both Span objects and UntranslatableSections. +struct UnifiedSpan { + // Only present for Span objects. If not present, this was an UntranslatableSection. + Maybe tag; - const StringPiece original_text = *string->value->str; + // The UTF-16 index into the string where this span starts. + uint32_t first_char; - StyleString localized; + // The UTF-16 index into the string where this span ends, inclusive. + uint32_t last_char; +}; - // Copy the spans. We will update their offsets when we localize. - localized.spans.reserve(string->value->spans.size()); - for (const StringPool::Span& span : string->value->spans) { - localized.spans.push_back( - Span{*span.name, span.first_char, span.last_char}); +inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) { + if (left.first_char < right.first_char) { + return true; + } else if (left.first_char > right.first_char) { + return false; + } else if (left.last_char < right.last_char) { + return true; } + return false; +} - // The ranges are all represented with a single value. This is the start of - // one range and end of another. - struct Range { - size_t start; - - // If set to true, toggles the state of translatability. - bool toggle_translatability; - - // Once the new string is localized, these are the pointers to the spans to adjust. - // Since this struct represents the start of one range and end of another, - // we have the two pointers respectively. - uint32_t* update_start; - uint32_t* update_end; - }; - - auto cmp = [](const Range& r, size_t index) -> bool { - return r.start < index; - }; - - // Construct the ranges. The ranges are represented like so: [0, 2, 5, 7] - // The ranges are the spaces in between. In this example, with a total string - // length of 9, the vector represents: (0,1], (2,4], (5,6], (7,9] - // - std::vector ranges; - ranges.push_back(Range{0, false, nullptr, nullptr}); - ranges.push_back(Range{original_text.size() - 1, false, nullptr, nullptr}); - for (size_t i = 0; i < string->value->spans.size(); i++) { - const StringPool::Span& span = string->value->spans[i]; - - // Insert or update the Range marker for the start of this span. - auto iter = - std::lower_bound(ranges.begin(), ranges.end(), span.first_char, cmp); - if (iter != ranges.end() && iter->start == span.first_char) { - iter->update_start = &localized.spans[i].first_char; - } else { - ranges.insert(iter, Range{span.first_char, false, &localized.spans[i].first_char, nullptr}); - } +inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) { + return UnifiedSpan{*span.name, span.first_char, span.last_char}; +} + +inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) { + return UnifiedSpan{ + {}, static_cast(section.start), static_cast(section.end) - 1}; +} - // Insert or update the Range marker for the end of this span. - iter = std::lower_bound(ranges.begin(), ranges.end(), span.last_char, cmp); - if (iter != ranges.end() && iter->start == span.last_char) { - iter->update_end = &localized.spans[i].last_char; +// Merges the Span and UntranslatableSections of this StyledString into a single vector of +// UnifiedSpans. This will first check that the Spans are sorted in ascending order. +static std::vector MergeSpans(const StyledString& string) { + // Ensure the Spans are sorted and converted. + std::vector sorted_spans; + sorted_spans.reserve(string.value->spans.size()); + std::transform(string.value->spans.begin(), string.value->spans.end(), + std::back_inserter(sorted_spans), SpanToUnifiedSpan); + + // Stable sort to ensure tag sequences like "" are preserved. + std::stable_sort(sorted_spans.begin(), sorted_spans.end()); + + // Ensure the UntranslatableSections are sorted and converted. + std::vector sorted_untranslatable_sections; + sorted_untranslatable_sections.reserve(string.untranslatable_sections.size()); + std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(), + std::back_inserter(sorted_untranslatable_sections), + UntranslatableSectionToUnifiedSpan); + std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end()); + + std::vector merged_spans; + merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size()); + auto span_iter = sorted_spans.begin(); + auto untranslatable_iter = sorted_untranslatable_sections.begin(); + while (span_iter != sorted_spans.end() && + untranslatable_iter != sorted_untranslatable_sections.end()) { + if (*span_iter < *untranslatable_iter) { + merged_spans.push_back(std::move(*span_iter)); + ++span_iter; } else { - ranges.insert(iter, Range{span.last_char, false, nullptr, &localized.spans[i].last_char}); + merged_spans.push_back(std::move(*untranslatable_iter)); + ++untranslatable_iter; } } - // Parts of the string may be untranslatable. Merge those ranges - // in as well, so that we have continuous sections of text to - // feed into the pseudolocalizer. - // We do this by marking the beginning of a range as either toggling - // the translatability state or not. - for (const UntranslatableSection& section : string->untranslatable_sections) { - auto iter = std::lower_bound(ranges.begin(), ranges.end(), section.start, cmp); - if (iter != ranges.end() && iter->start == section.start) { - // An existing span starts (or ends) here. We just need to mark that - // the translatability should toggle here. If translatability was - // already being toggled, then that means we have two adjacent ranges of untranslatable - // text, so remove the toggle and only toggle at the end of this range, - // effectively merging these ranges. - iter->toggle_translatability = !iter->toggle_translatability; - } else { - // Insert a new range that specifies to toggle the translatability. - iter = ranges.insert(iter, Range{section.start, true, nullptr, nullptr}); - } + while (span_iter != sorted_spans.end()) { + merged_spans.push_back(std::move(*span_iter)); + ++span_iter; + } - // Update/create an end to the untranslatable section. - iter = std::lower_bound(iter, ranges.end(), section.end, cmp); - if (iter != ranges.end() && iter->start == section.end) { - iter->toggle_translatability = true; - } else { - iter = ranges.insert(iter, Range{section.end, true, nullptr, nullptr}); - } + while (untranslatable_iter != sorted_untranslatable_sections.end()) { + merged_spans.push_back(std::move(*untranslatable_iter)); + ++untranslatable_iter; } + return merged_spans; +} - localized.str += localizer.Start(); +std::unique_ptr PseudolocalizeStyledString(StyledString* string, + Pseudolocalizer::Method method, + StringPool* pool) { + Pseudolocalizer localizer(method); - // Iterate over the ranges and localize each section. - // The text starts as translatable, and each time a range has toggle_translatability - // set to true, we toggle whether to translate or not. - // This assumes no untranslatable ranges overlap. - bool translatable = true; - for (size_t i = 0; i < ranges.size(); i++) { - const size_t start = ranges[i].start; - size_t len = original_text.size() - start; - if (i + 1 < ranges.size()) { - len = ranges[i + 1].start - start; - } + // Collect the spans and untranslatable sections into one set of spans, sorted by first_char. + // This will effectively subdivide the string into multiple sections that can be individually + // pseudolocalized, while keeping the span indices synchronized. + std::vector merged_spans = MergeSpans(*string); - if (ranges[i].update_start) { - *ranges[i].update_start = localized.str.size(); - } + // All Span indices are UTF-16 based, according to the resources.arsc format expected by the + // runtime. So we will do all our processing in UTF-16, then convert back. + const std::u16string text16 = util::Utf8ToUtf16(*string->value->str); - if (ranges[i].update_end) { - *ranges[i].update_end = localized.str.size(); - } + // Convenient wrapper around the text that allows us to work with StringPieces. + const StringPiece16 text(text16); + + // The new string. + std::string new_string = localizer.Start(); + + // The stack that keeps track of what nested Span we're in. + std::vector span_stack; + + // The current position in the original text. + uint32_t cursor = 0u; + + // The current position in the new text. + uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast(new_string.data()), + new_string.size(), false); - if (ranges[i].toggle_translatability) { - translatable = !translatable; + // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it. + bool translatable = true; + size_t span_idx = 0u; + while (span_idx < merged_spans.size() || !span_stack.empty()) { + UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx]; + UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()]; + + if (span != nullptr) { + if (parent_span == nullptr || parent_span->last_char > span->first_char) { + // There is no parent, or this span is the child of the parent. + // Pseudolocalize all the text until this span. + const StringPiece16 substr = text.substr(cursor, span->first_char - cursor); + cursor += substr.size(); + + // Pseudolocalize the substring. + std::string new_substr = util::Utf16ToUtf8(substr); + if (translatable) { + new_substr = localizer.Text(new_substr); + } + new_cursor += utf8_to_utf16_length(reinterpret_cast(new_substr.data()), + new_substr.size(), false); + new_string += new_substr; + + // Rewrite the first_char. + span->first_char = new_cursor; + if (!span->tag) { + // An untranslatable section has begun! + translatable = false; + } + span_stack.push_back(span_idx); + ++span_idx; + continue; + } } - if (translatable) { - localized.str += localizer.Text(original_text.substr(start, len)); - } else { - localized.str += original_text.substr(start, len); + if (parent_span != nullptr) { + // There is a parent, and either this span is not a child of it, or there are no more spans. + // Pop this off the stack. + const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1); + cursor += substr.size(); + + // Pseudolocalize the substring. + std::string new_substr = util::Utf16ToUtf8(substr); + if (translatable) { + new_substr = localizer.Text(new_substr); + } + new_cursor += utf8_to_utf16_length(reinterpret_cast(new_substr.data()), + new_substr.size(), false); + new_string += new_substr; + + parent_span->last_char = new_cursor - 1; + if (parent_span->tag) { + // An end to an untranslatable section. + translatable = true; + } + span_stack.pop_back(); } } - localized.str += localizer.End(); + // Finish the pseudolocalization at the end of the string. + new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor))); + new_string += localizer.End(); + + StyleString localized; + localized.str = std::move(new_string); + // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections. + for (UnifiedSpan& span : merged_spans) { + if (span.tag) { + localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char}); + } + } return util::make_unique(pool->MakeRef(localized)); } @@ -175,8 +233,7 @@ class Visitor : public RawValueVisitor { if (sub_visitor.value) { localized->values[i] = std::move(sub_visitor.item); } else { - localized->values[i] = - std::unique_ptr(plural->values[i]->Clone(pool_)); + localized->values[i] = std::unique_ptr(plural->values[i]->Clone(pool_)); } } } @@ -210,8 +267,7 @@ class Visitor : public RawValueVisitor { } result += localizer_.End(); - std::unique_ptr localized = - util::make_unique(pool_->MakeRef(result)); + std::unique_ptr localized = util::make_unique(pool_->MakeRef(result)); localized->SetSource(string->GetSource()); localized->SetWeak(true); item = std::move(localized); @@ -282,14 +338,10 @@ void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method, } } -/** - * A value is pseudolocalizable if it does not define a locale (or is the - * default locale) - * and is translatable. - */ +// A value is pseudolocalizable if it does not define a locale (or is the default locale) and is +// translatable. static bool IsPseudolocalizable(ResourceConfigValue* config_value) { - const int diff = - config_value->config.diff(ConfigDescription::DefaultConfig()); + const int diff = config_value->config.diff(ConfigDescription::DefaultConfig()); if (diff & ConfigDescription::CONFIG_LOCALE) { return false; } @@ -298,19 +350,16 @@ static bool IsPseudolocalizable(ResourceConfigValue* config_value) { } // namespace -bool PseudolocaleGenerator::Consume(IAaptContext* context, - ResourceTable* table) { +bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) { for (auto& package : table->packages) { for (auto& type : package->types) { for (auto& entry : type->entries) { - std::vector values = - entry->FindValuesIf(IsPseudolocalizable); - + std::vector values = entry->FindValuesIf(IsPseudolocalizable); for (ResourceConfigValue* value : values) { - PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, - &table->string_pool, entry.get()); - PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, - &table->string_pool, entry.get()); + PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool, + entry.get()); + PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool, + entry.get()); } } } diff --git a/tools/aapt2/compile/PseudolocaleGenerator_test.cpp b/tools/aapt2/compile/PseudolocaleGenerator_test.cpp index 4db37db55eb7..b08e1dab35a9 100644 --- a/tools/aapt2/compile/PseudolocaleGenerator_test.cpp +++ b/tools/aapt2/compile/PseudolocaleGenerator_test.cpp @@ -25,7 +25,7 @@ TEST(PseudolocaleGeneratorTest, PseudolocalizeStyledString) { StringPool pool; StyleString original_style; original_style.str = "Hello world!"; - original_style.spans = {Span{"b", 2, 3}, Span{"b", 6, 7}, Span{"i", 1, 10}}; + original_style.spans = {Span{"i", 1, 10}, Span{"b", 2, 3}, Span{"b", 6, 7}}; std::unique_ptr new_string = PseudolocalizeStyledString( util::make_unique(pool.MakeRef(original_style)).get(), @@ -34,22 +34,19 @@ TEST(PseudolocaleGeneratorTest, PseudolocalizeStyledString) { EXPECT_EQ(original_style.str, *new_string->value->str); ASSERT_EQ(original_style.spans.size(), new_string->value->spans.size()); - EXPECT_EQ(std::string("He").size(), new_string->value->spans[0].first_char); - EXPECT_EQ(std::string("Hel").size(), new_string->value->spans[0].last_char); - EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name); + EXPECT_EQ(std::string("i"), *new_string->value->spans[0].name); + EXPECT_EQ(std::u16string(u"H").size(), new_string->value->spans[0].first_char); + EXPECT_EQ(std::u16string(u"Hello worl").size(), new_string->value->spans[0].last_char); - EXPECT_EQ(std::string("Hello ").size(), - new_string->value->spans[1].first_char); - EXPECT_EQ(std::string("Hello w").size(), - new_string->value->spans[1].last_char); EXPECT_EQ(std::string("b"), *new_string->value->spans[1].name); + EXPECT_EQ(std::u16string(u"He").size(), new_string->value->spans[1].first_char); + EXPECT_EQ(std::u16string(u"Hel").size(), new_string->value->spans[1].last_char); - EXPECT_EQ(std::string("H").size(), new_string->value->spans[2].first_char); - EXPECT_EQ(std::string("Hello worl").size(), - new_string->value->spans[2].last_char); - EXPECT_EQ(std::string("i"), *new_string->value->spans[2].name); + EXPECT_EQ(std::string("b"), *new_string->value->spans[2].name); + EXPECT_EQ(std::u16string(u"Hello ").size(), new_string->value->spans[2].first_char); + EXPECT_EQ(std::u16string(u"Hello w").size(), new_string->value->spans[2].last_char); - original_style.spans.push_back(Span{"em", 0, 11u}); + original_style.spans.insert(original_style.spans.begin(), Span{"em", 0, 11u}); new_string = PseudolocalizeStyledString( util::make_unique(pool.MakeRef(original_style)).get(), @@ -58,23 +55,128 @@ TEST(PseudolocaleGeneratorTest, PseudolocalizeStyledString) { EXPECT_EQ(std::string("[Ĥéļļö ŵöŕļð¡ one two]"), *new_string->value->str); ASSERT_EQ(original_style.spans.size(), new_string->value->spans.size()); - EXPECT_EQ(std::string("[Ĥé").size(), new_string->value->spans[0].first_char); - EXPECT_EQ(std::string("[Ĥéļ").size(), new_string->value->spans[0].last_char); + EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[0].first_char); + EXPECT_EQ(std::u16string(u"[Ĥéļļö ŵöŕļð").size(), new_string->value->spans[0].last_char); + + EXPECT_EQ(std::u16string(u"[Ĥ").size(), new_string->value->spans[1].first_char); + EXPECT_EQ(std::u16string(u"[Ĥéļļö ŵöŕļ").size(), new_string->value->spans[1].last_char); + + EXPECT_EQ(std::u16string(u"[Ĥé").size(), new_string->value->spans[2].first_char); + EXPECT_EQ(std::u16string(u"[Ĥéļ").size(), new_string->value->spans[2].last_char); + + EXPECT_EQ(std::u16string(u"[Ĥéļļö ").size(), new_string->value->spans[3].first_char); + EXPECT_EQ(std::u16string(u"[Ĥéļļö ŵ").size(), new_string->value->spans[3].last_char); +} + +TEST(PseudolocaleGeneratorTest, PseudolocalizeAdjacentNestedTags) { + StringPool pool; + StyleString original_style; + original_style.str = "bold"; + original_style.spans = {Span{"b", 0, 3}, Span{"i", 0, 3}}; + + std::unique_ptr new_string = PseudolocalizeStyledString( + util::make_unique(pool.MakeRef(original_style)).get(), + Pseudolocalizer::Method::kAccent, &pool); + ASSERT_NE(nullptr, new_string); + ASSERT_EQ(2u, new_string->value->spans.size()); + EXPECT_EQ(std::string("[ɓöļð one]"), *new_string->value->str); + + EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name); + EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[0].first_char); + EXPECT_EQ(std::u16string(u"[ɓöļ").size(), new_string->value->spans[0].last_char); + + EXPECT_EQ(std::string("i"), *new_string->value->spans[1].name); + EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[1].first_char); + EXPECT_EQ(std::u16string(u"[ɓöļ").size(), new_string->value->spans[1].last_char); +} + +TEST(PseudolocaleGeneratorTest, PseudolocalizeAdjacentTagsUnsorted) { + StringPool pool; + StyleString original_style; + original_style.str = "bold"; + original_style.spans = {Span{"i", 2, 3}, Span{"b", 0, 1}}; + + std::unique_ptr new_string = PseudolocalizeStyledString( + util::make_unique(pool.MakeRef(original_style)).get(), + Pseudolocalizer::Method::kAccent, &pool); + ASSERT_NE(nullptr, new_string); + ASSERT_EQ(2u, new_string->value->spans.size()); + EXPECT_EQ(std::string("[ɓöļð one]"), *new_string->value->str); + + EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name); + EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[0].first_char); + EXPECT_EQ(std::u16string(u"[ɓ").size(), new_string->value->spans[0].last_char); + + EXPECT_EQ(std::string("i"), *new_string->value->spans[1].name); + EXPECT_EQ(std::u16string(u"[ɓö").size(), new_string->value->spans[1].first_char); + EXPECT_EQ(std::u16string(u"[ɓöļ").size(), new_string->value->spans[1].last_char); +} + +TEST(PseudolocaleGeneratorTest, PseudolocalizeNestedAndAdjacentTags) { + StringPool pool; + StyleString original_style; + original_style.str = "This sentence is not what you think it is at all."; + original_style.spans = {Span{"b", 16u, 19u}, Span{"em", 29u, 47u}, Span{"i", 38u, 40u}, + Span{"b", 44u, 47u}}; + + std::unique_ptr new_string = PseudolocalizeStyledString( + util::make_unique(pool.MakeRef(original_style)).get(), + Pseudolocalizer::Method::kAccent, &pool); + ASSERT_NE(nullptr, new_string); + ASSERT_EQ(4u, new_string->value->spans.size()); + EXPECT_EQ(std::string( + "[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ åļļ. one two three four five six]"), + *new_string->value->str); + + EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name); + EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš").size(), new_string->value->spans[0].first_char); + EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñö").size(), new_string->value->spans[0].last_char); - EXPECT_EQ(std::string("[Ĥéļļö ").size(), + EXPECT_EQ(std::string("em"), *new_string->value->spans[1].name); + EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû").size(), new_string->value->spans[1].first_char); - EXPECT_EQ(std::string("[Ĥéļļö ŵ").size(), + EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ åļ").size(), new_string->value->spans[1].last_char); - EXPECT_EQ(std::string("[Ĥ").size(), new_string->value->spans[2].first_char); - EXPECT_EQ(std::string("[Ĥéļļö ŵöŕļ").size(), + EXPECT_EQ(std::string("i"), *new_string->value->spans[2].name); + EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ").size(), + new_string->value->spans[2].first_char); + EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ î").size(), new_string->value->spans[2].last_char); - EXPECT_EQ(std::string("[").size(), new_string->value->spans[3].first_char); - EXPECT_EQ(std::string("[Ĥéļļö ŵöŕļð").size(), + EXPECT_EQ(std::string("b"), *new_string->value->spans[3].name); + EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ").size(), + new_string->value->spans[3].first_char); + EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ åļ").size(), new_string->value->spans[3].last_char); } +TEST(PseudolocaleGeneratorTest, PseudolocalizePartsOfString) { + StringPool pool; + StyleString original_style; + original_style.str = "This should NOT be pseudolocalized."; + original_style.spans = {Span{"em", 4u, 14u}, Span{"i", 18u, 33u}}; + std::unique_ptr original_string = + util::make_unique(pool.MakeRef(original_style)); + original_string->untranslatable_sections = {UntranslatableSection{11u, 15u}}; + + std::unique_ptr new_string = + PseudolocalizeStyledString(original_string.get(), Pseudolocalizer::Method::kAccent, &pool); + ASSERT_NE(nullptr, new_string); + ASSERT_EQ(2u, new_string->value->spans.size()); + EXPECT_EQ(std::string("[Ţĥîš šĥöûļð NOT ɓé þšéûðöļöçåļîžéð. one two three four]"), + *new_string->value->str); + + EXPECT_EQ(std::string("em"), *new_string->value->spans[0].name); + EXPECT_EQ(std::u16string(u"[Ţĥîš").size(), new_string->value->spans[0].first_char); + EXPECT_EQ(std::u16string(u"[Ţĥîš šĥöûļð NO").size(), new_string->value->spans[0].last_char); + + EXPECT_EQ(std::string("i"), *new_string->value->spans[1].name); + EXPECT_EQ(std::u16string(u"[Ţĥîš šĥöûļð NOT ɓé").size(), new_string->value->spans[1].first_char); + EXPECT_EQ(std::u16string(u"[Ţĥîš šĥöûļð NOT ɓé þšéûðöļöçåļîžé").size(), + new_string->value->spans[1].last_char); +} + TEST(PseudolocaleGeneratorTest, PseudolocalizeOnlyDefaultConfigs) { std::unique_ptr table = test::ResourceTableBuilder() @@ -138,7 +240,7 @@ TEST(PseudolocaleGeneratorTest, RespectUntranslateableSections) { { StyleString original_style; original_style.str = "Hello world!"; - original_style.spans = {Span{"b", 2, 3}, Span{"b", 6, 7}, Span{"i", 1, 10}}; + original_style.spans = {Span{"i", 1, 10}, Span{"b", 2, 3}, Span{"b", 6, 7}}; auto styled_string = util::make_unique(table->string_pool.MakeRef(original_style)); -- cgit v1.2.3-59-g8ed1b