diff options
| author | 2017-03-31 18:28:14 -0700 | |
|---|---|---|
| committer | 2017-03-31 18:30:20 -0700 | |
| commit | 8049f3da712ea9c3154b57ce2276c97e749d1f2c (patch) | |
| tree | cd4b22971ad876eb8421a7a5870da9e49d62c9f1 /tools | |
| parent | 881a623e3522e3e4f0905b606ca5c77cf899b21c (diff) | |
AAPT2: Fix pseudolocalization (again)
Pseudolocalization didn't properly handle spans in
strings like "<small><small>Hello</small></small>".
The spans would be identical and when doing range checks
only one of them would be updated.
Switched to a more robust way of extracting the relevant
chunks of a styled string. This uses a stack, which is more
in line with the real representation in XML.
Bug: 34088357
Test: make aapt2_tests
Change-Id: Ia4e4501713e688c96a89e26e4e2b1384f4cd3889
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/aapt2/ResourceParser.cpp | 12 | ||||
| -rw-r--r-- | tools/aapt2/ResourceParser_test.cpp | 12 | ||||
| -rw-r--r-- | tools/aapt2/compile/PseudolocaleGenerator.cpp | 293 | ||||
| -rw-r--r-- | tools/aapt2/compile/PseudolocaleGenerator_test.cpp | 146 | 
4 files changed, 311 insertions, 152 deletions
| diff --git a/tools/aapt2/ResourceParser.cpp b/tools/aapt2/ResourceParser.cpp index 8461905d8034..90f713b67985 100644 --- a/tools/aapt2/ResourceParser.cpp +++ b/tools/aapt2/ResourceParser.cpp @@ -155,7 +155,10 @@ bool ResourceParser::FlattenXmlSubtree(      xml::XmlPullParser* parser, std::string* out_raw_string, StyleString* out_style_string,      std::vector<UntranslatableSection>* out_untranslatable_sections) {    // Keeps track of formatting tags (<b>, <i>) and the range of characters for which they apply. -  std::vector<Span> span_stack; +  // The stack elements refer to the indices in out_style_string->spans. +  // By first adding to the out_style_string->spans vector, and then using the stack to refer +  // to this vector, the original order of tags is preserved in cases such as <b><i>hello</b></i>. +  std::vector<size_t> span_stack;    // Clear the output variables.    out_raw_string->clear(); @@ -192,7 +195,9 @@ bool ResourceParser::FlattenXmlSubtree(            return false;          } -        span_stack.push_back(Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())}); +        out_style_string->spans.push_back( +            Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())}); +        span_stack.push_back(out_style_string->spans.size() - 1);        } else if (parser->element_namespace() == sXliffNamespaceUri) {          if (parser->element_name() == "g") {            if (untranslatable_start_depth) { @@ -233,9 +238,8 @@ bool ResourceParser::FlattenXmlSubtree(        if (parser->element_namespace().empty()) {          // This is an HTML tag which we encode as a span. Update the span          // stack and pop the top entry. -        Span& top_span = span_stack.back(); +        Span& top_span = out_style_string->spans[span_stack.back()];          top_span.last_char = builder.Utf16Len() - 1; -        out_style_string->spans.push_back(std::move(top_span));          span_stack.pop_back();        } else if (untranslatable_start_depth == make_value(depth)) {          // This is the end of an untranslatable section. Use UTF8 indices/lengths. diff --git a/tools/aapt2/ResourceParser_test.cpp b/tools/aapt2/ResourceParser_test.cpp index eefa320a4418..8062c2e6afea 100644 --- a/tools/aapt2/ResourceParser_test.cpp +++ b/tools/aapt2/ResourceParser_test.cpp @@ -101,20 +101,24 @@ TEST_F(ResourceParserTest, ParseStyledString) {    // Use a surrogate pair unicode point so that we can verify that the span    // indices use UTF-16 length and not UTF-8 length.    std::string input = -      "<string name=\"foo\">This is my aunt\u2019s <b>string</b></string>"; +      "<string name=\"foo\">This is my aunt\u2019s <b>fickle <small>string</small></b></string>";    ASSERT_TRUE(TestParse(input));    StyledString* str = test::GetValue<StyledString>(&table_, "string/foo");    ASSERT_NE(nullptr, str); -  const std::string expected_str = "This is my aunt\u2019s string"; +  const std::string expected_str = "This is my aunt\u2019s fickle string";    EXPECT_EQ(expected_str, *str->value->str); -  EXPECT_EQ(1u, str->value->spans.size()); +  EXPECT_EQ(2u, str->value->spans.size());    EXPECT_TRUE(str->untranslatable_sections.empty());    EXPECT_EQ(std::string("b"), *str->value->spans[0].name);    EXPECT_EQ(17u, str->value->spans[0].first_char); -  EXPECT_EQ(23u, str->value->spans[0].last_char); +  EXPECT_EQ(30u, str->value->spans[0].last_char); + +  EXPECT_EQ(std::string("small"), *str->value->spans[1].name); +  EXPECT_EQ(24u, str->value->spans[1].first_char); +  EXPECT_EQ(30u, str->value->spans[1].last_char);  }  TEST_F(ResourceParserTest, ParseStringWithWhitespace) { diff --git a/tools/aapt2/compile/PseudolocaleGenerator.cpp b/tools/aapt2/compile/PseudolocaleGenerator.cpp index fad9edd04e4c..a031ea4c31ec 100644 --- a/tools/aapt2/compile/PseudolocaleGenerator.cpp +++ b/tools/aapt2/compile/PseudolocaleGenerator.cpp @@ -22,136 +22,194 @@  #include "ResourceValues.h"  #include "ValueVisitor.h"  #include "compile/Pseudolocalizer.h" +#include "util/Util.h"  using android::StringPiece; +using android::StringPiece16;  namespace aapt { -std::unique_ptr<StyledString> PseudolocalizeStyledString( -    StyledString* string, Pseudolocalizer::Method method, StringPool* pool) { -  Pseudolocalizer localizer(method); +// The struct that represents both Span objects and UntranslatableSections. +struct UnifiedSpan { +  // Only present for Span objects. If not present, this was an UntranslatableSection. +  Maybe<std::string> tag; -  const StringPiece original_text = *string->value->str; +  // The UTF-16 index into the string where this span starts. +  uint32_t first_char; -  StyleString localized; +  // The UTF-16 index into the string where this span ends, inclusive. +  uint32_t last_char; +}; -  // Copy the spans. We will update their offsets when we localize. -  localized.spans.reserve(string->value->spans.size()); -  for (const StringPool::Span& span : string->value->spans) { -    localized.spans.push_back( -        Span{*span.name, span.first_char, span.last_char}); +inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) { +  if (left.first_char < right.first_char) { +    return true; +  } else if (left.first_char > right.first_char) { +    return false; +  } else if (left.last_char < right.last_char) { +    return true;    } +  return false; +} -  // The ranges are all represented with a single value. This is the start of -  // one range and end of another. -  struct Range { -    size_t start; - -    // If set to true, toggles the state of translatability. -    bool toggle_translatability; - -    // Once the new string is localized, these are the pointers to the spans to adjust. -    // Since this struct represents the start of one range and end of another, -    // we have the two pointers respectively. -    uint32_t* update_start; -    uint32_t* update_end; -  }; - -  auto cmp = [](const Range& r, size_t index) -> bool { -    return r.start < index; -  }; - -  // Construct the ranges. The ranges are represented like so: [0, 2, 5, 7] -  // The ranges are the spaces in between. In this example, with a total string -  // length of 9, the vector represents: (0,1], (2,4], (5,6], (7,9] -  // -  std::vector<Range> ranges; -  ranges.push_back(Range{0, false, nullptr, nullptr}); -  ranges.push_back(Range{original_text.size() - 1, false, nullptr, nullptr}); -  for (size_t i = 0; i < string->value->spans.size(); i++) { -    const StringPool::Span& span = string->value->spans[i]; - -    // Insert or update the Range marker for the start of this span. -    auto iter = -        std::lower_bound(ranges.begin(), ranges.end(), span.first_char, cmp); -    if (iter != ranges.end() && iter->start == span.first_char) { -      iter->update_start = &localized.spans[i].first_char; -    } else { -      ranges.insert(iter, Range{span.first_char, false, &localized.spans[i].first_char, nullptr}); -    } +inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) { +  return UnifiedSpan{*span.name, span.first_char, span.last_char}; +} + +inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) { +  return UnifiedSpan{ +      {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1}; +} -    // Insert or update the Range marker for the end of this span. -    iter = std::lower_bound(ranges.begin(), ranges.end(), span.last_char, cmp); -    if (iter != ranges.end() && iter->start == span.last_char) { -      iter->update_end = &localized.spans[i].last_char; +// Merges the Span and UntranslatableSections of this StyledString into a single vector of +// UnifiedSpans. This will first check that the Spans are sorted in ascending order. +static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) { +  // Ensure the Spans are sorted and converted. +  std::vector<UnifiedSpan> sorted_spans; +  sorted_spans.reserve(string.value->spans.size()); +  std::transform(string.value->spans.begin(), string.value->spans.end(), +                 std::back_inserter(sorted_spans), SpanToUnifiedSpan); + +  // Stable sort to ensure tag sequences like "<b><i>" are preserved. +  std::stable_sort(sorted_spans.begin(), sorted_spans.end()); + +  // Ensure the UntranslatableSections are sorted and converted. +  std::vector<UnifiedSpan> sorted_untranslatable_sections; +  sorted_untranslatable_sections.reserve(string.untranslatable_sections.size()); +  std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(), +                 std::back_inserter(sorted_untranslatable_sections), +                 UntranslatableSectionToUnifiedSpan); +  std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end()); + +  std::vector<UnifiedSpan> merged_spans; +  merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size()); +  auto span_iter = sorted_spans.begin(); +  auto untranslatable_iter = sorted_untranslatable_sections.begin(); +  while (span_iter != sorted_spans.end() && +         untranslatable_iter != sorted_untranslatable_sections.end()) { +    if (*span_iter < *untranslatable_iter) { +      merged_spans.push_back(std::move(*span_iter)); +      ++span_iter;      } else { -      ranges.insert(iter, Range{span.last_char, false, nullptr, &localized.spans[i].last_char}); +      merged_spans.push_back(std::move(*untranslatable_iter)); +      ++untranslatable_iter;      }    } -  // Parts of the string may be untranslatable. Merge those ranges -  // in as well, so that we have continuous sections of text to -  // feed into the pseudolocalizer. -  // We do this by marking the beginning of a range as either toggling -  // the translatability state or not. -  for (const UntranslatableSection& section : string->untranslatable_sections) { -    auto iter = std::lower_bound(ranges.begin(), ranges.end(), section.start, cmp); -    if (iter != ranges.end() && iter->start == section.start) { -      // An existing span starts (or ends) here. We just need to mark that -      // the translatability should toggle here. If translatability was -      // already being toggled, then that means we have two adjacent ranges of untranslatable -      // text, so remove the toggle and only toggle at the end of this range, -      // effectively merging these ranges. -      iter->toggle_translatability = !iter->toggle_translatability; -    } else { -      // Insert a new range that specifies to toggle the translatability. -      iter = ranges.insert(iter, Range{section.start, true, nullptr, nullptr}); -    } +  while (span_iter != sorted_spans.end()) { +    merged_spans.push_back(std::move(*span_iter)); +    ++span_iter; +  } -    // Update/create an end to the untranslatable section. -    iter = std::lower_bound(iter, ranges.end(), section.end, cmp); -    if (iter != ranges.end() && iter->start == section.end) { -      iter->toggle_translatability = true; -    } else { -      iter = ranges.insert(iter, Range{section.end, true, nullptr, nullptr}); -    } +  while (untranslatable_iter != sorted_untranslatable_sections.end()) { +    merged_spans.push_back(std::move(*untranslatable_iter)); +    ++untranslatable_iter;    } +  return merged_spans; +} -  localized.str += localizer.Start(); +std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string, +                                                         Pseudolocalizer::Method method, +                                                         StringPool* pool) { +  Pseudolocalizer localizer(method); -  // Iterate over the ranges and localize each section. -  // The text starts as translatable, and each time a range has toggle_translatability -  // set to true, we toggle whether to translate or not. -  // This assumes no untranslatable ranges overlap. -  bool translatable = true; -  for (size_t i = 0; i < ranges.size(); i++) { -    const size_t start = ranges[i].start; -    size_t len = original_text.size() - start; -    if (i + 1 < ranges.size()) { -      len = ranges[i + 1].start - start; -    } +  // Collect the spans and untranslatable sections into one set of spans, sorted by first_char. +  // This will effectively subdivide the string into multiple sections that can be individually +  // pseudolocalized, while keeping the span indices synchronized. +  std::vector<UnifiedSpan> merged_spans = MergeSpans(*string); -    if (ranges[i].update_start) { -      *ranges[i].update_start = localized.str.size(); -    } +  // All Span indices are UTF-16 based, according to the resources.arsc format expected by the +  // runtime. So we will do all our processing in UTF-16, then convert back. +  const std::u16string text16 = util::Utf8ToUtf16(*string->value->str); -    if (ranges[i].update_end) { -      *ranges[i].update_end = localized.str.size(); -    } +  // Convenient wrapper around the text that allows us to work with StringPieces. +  const StringPiece16 text(text16); + +  // The new string. +  std::string new_string = localizer.Start(); + +  // The stack that keeps track of what nested Span we're in. +  std::vector<size_t> span_stack; + +  // The current position in the original text. +  uint32_t cursor = 0u; + +  // The current position in the new text. +  uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()), +                                             new_string.size(), false); -    if (ranges[i].toggle_translatability) { -      translatable = !translatable; +  // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it. +  bool translatable = true; +  size_t span_idx = 0u; +  while (span_idx < merged_spans.size() || !span_stack.empty()) { +    UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx]; +    UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()]; + +    if (span != nullptr) { +      if (parent_span == nullptr || parent_span->last_char > span->first_char) { +        // There is no parent, or this span is the child of the parent. +        // Pseudolocalize all the text until this span. +        const StringPiece16 substr = text.substr(cursor, span->first_char - cursor); +        cursor += substr.size(); + +        // Pseudolocalize the substring. +        std::string new_substr = util::Utf16ToUtf8(substr); +        if (translatable) { +          new_substr = localizer.Text(new_substr); +        } +        new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()), +                                           new_substr.size(), false); +        new_string += new_substr; + +        // Rewrite the first_char. +        span->first_char = new_cursor; +        if (!span->tag) { +          // An untranslatable section has begun! +          translatable = false; +        } +        span_stack.push_back(span_idx); +        ++span_idx; +        continue; +      }      } -    if (translatable) { -      localized.str += localizer.Text(original_text.substr(start, len)); -    } else { -      localized.str += original_text.substr(start, len); +    if (parent_span != nullptr) { +      // There is a parent, and either this span is not a child of it, or there are no more spans. +      // Pop this off the stack. +      const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1); +      cursor += substr.size(); + +      // Pseudolocalize the substring. +      std::string new_substr = util::Utf16ToUtf8(substr); +      if (translatable) { +        new_substr = localizer.Text(new_substr); +      } +      new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()), +                                         new_substr.size(), false); +      new_string += new_substr; + +      parent_span->last_char = new_cursor - 1; +      if (parent_span->tag) { +        // An end to an untranslatable section. +        translatable = true; +      } +      span_stack.pop_back();      }    } -  localized.str += localizer.End(); +  // Finish the pseudolocalization at the end of the string. +  new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor))); +  new_string += localizer.End(); + +  StyleString localized; +  localized.str = std::move(new_string); +  // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections. +  for (UnifiedSpan& span : merged_spans) { +    if (span.tag) { +      localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char}); +    } +  }    return util::make_unique<StyledString>(pool->MakeRef(localized));  } @@ -175,8 +233,7 @@ class Visitor : public RawValueVisitor {          if (sub_visitor.value) {            localized->values[i] = std::move(sub_visitor.item);          } else { -          localized->values[i] = -              std::unique_ptr<Item>(plural->values[i]->Clone(pool_)); +          localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_));          }        }      } @@ -210,8 +267,7 @@ class Visitor : public RawValueVisitor {      }      result += localizer_.End(); -    std::unique_ptr<String> localized = -        util::make_unique<String>(pool_->MakeRef(result)); +    std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));      localized->SetSource(string->GetSource());      localized->SetWeak(true);      item = std::move(localized); @@ -282,14 +338,10 @@ void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,    }  } -/** - * A value is pseudolocalizable if it does not define a locale (or is the - * default locale) - * and is translatable. - */ +// A value is pseudolocalizable if it does not define a locale (or is the default locale) and is +// translatable.  static bool IsPseudolocalizable(ResourceConfigValue* config_value) { -  const int diff = -      config_value->config.diff(ConfigDescription::DefaultConfig()); +  const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());    if (diff & ConfigDescription::CONFIG_LOCALE) {      return false;    } @@ -298,19 +350,16 @@ static bool IsPseudolocalizable(ResourceConfigValue* config_value) {  }  // namespace -bool PseudolocaleGenerator::Consume(IAaptContext* context, -                                    ResourceTable* table) { +bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {    for (auto& package : table->packages) {      for (auto& type : package->types) {        for (auto& entry : type->entries) { -        std::vector<ResourceConfigValue*> values = -            entry->FindValuesIf(IsPseudolocalizable); - +        std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);          for (ResourceConfigValue* value : values) { -          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, -                                 &table->string_pool, entry.get()); -          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, -                                 &table->string_pool, entry.get()); +          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool, +                                 entry.get()); +          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool, +                                 entry.get());          }        }      } diff --git a/tools/aapt2/compile/PseudolocaleGenerator_test.cpp b/tools/aapt2/compile/PseudolocaleGenerator_test.cpp index 4db37db55eb7..b08e1dab35a9 100644 --- a/tools/aapt2/compile/PseudolocaleGenerator_test.cpp +++ b/tools/aapt2/compile/PseudolocaleGenerator_test.cpp @@ -25,7 +25,7 @@ TEST(PseudolocaleGeneratorTest, PseudolocalizeStyledString) {    StringPool pool;    StyleString original_style;    original_style.str = "Hello world!"; -  original_style.spans = {Span{"b", 2, 3}, Span{"b", 6, 7}, Span{"i", 1, 10}}; +  original_style.spans = {Span{"i", 1, 10}, Span{"b", 2, 3}, Span{"b", 6, 7}};    std::unique_ptr<StyledString> new_string = PseudolocalizeStyledString(        util::make_unique<StyledString>(pool.MakeRef(original_style)).get(), @@ -34,22 +34,19 @@ TEST(PseudolocaleGeneratorTest, PseudolocalizeStyledString) {    EXPECT_EQ(original_style.str, *new_string->value->str);    ASSERT_EQ(original_style.spans.size(), new_string->value->spans.size()); -  EXPECT_EQ(std::string("He").size(), new_string->value->spans[0].first_char); -  EXPECT_EQ(std::string("Hel").size(), new_string->value->spans[0].last_char); -  EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name); +  EXPECT_EQ(std::string("i"), *new_string->value->spans[0].name); +  EXPECT_EQ(std::u16string(u"H").size(), new_string->value->spans[0].first_char); +  EXPECT_EQ(std::u16string(u"Hello worl").size(), new_string->value->spans[0].last_char); -  EXPECT_EQ(std::string("Hello ").size(), -            new_string->value->spans[1].first_char); -  EXPECT_EQ(std::string("Hello w").size(), -            new_string->value->spans[1].last_char);    EXPECT_EQ(std::string("b"), *new_string->value->spans[1].name); +  EXPECT_EQ(std::u16string(u"He").size(), new_string->value->spans[1].first_char); +  EXPECT_EQ(std::u16string(u"Hel").size(), new_string->value->spans[1].last_char); -  EXPECT_EQ(std::string("H").size(), new_string->value->spans[2].first_char); -  EXPECT_EQ(std::string("Hello worl").size(), -            new_string->value->spans[2].last_char); -  EXPECT_EQ(std::string("i"), *new_string->value->spans[2].name); +  EXPECT_EQ(std::string("b"), *new_string->value->spans[2].name); +  EXPECT_EQ(std::u16string(u"Hello ").size(), new_string->value->spans[2].first_char); +  EXPECT_EQ(std::u16string(u"Hello w").size(), new_string->value->spans[2].last_char); -  original_style.spans.push_back(Span{"em", 0, 11u}); +  original_style.spans.insert(original_style.spans.begin(), Span{"em", 0, 11u});    new_string = PseudolocalizeStyledString(        util::make_unique<StyledString>(pool.MakeRef(original_style)).get(), @@ -58,23 +55,128 @@ TEST(PseudolocaleGeneratorTest, PseudolocalizeStyledString) {    EXPECT_EQ(std::string("[Ĥéļļö ŵöŕļð¡ one two]"), *new_string->value->str);    ASSERT_EQ(original_style.spans.size(), new_string->value->spans.size()); -  EXPECT_EQ(std::string("[Ĥé").size(), new_string->value->spans[0].first_char); -  EXPECT_EQ(std::string("[Ĥéļ").size(), new_string->value->spans[0].last_char); +  EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[0].first_char); +  EXPECT_EQ(std::u16string(u"[Ĥéļļö ŵöŕļð").size(), new_string->value->spans[0].last_char); + +  EXPECT_EQ(std::u16string(u"[Ĥ").size(), new_string->value->spans[1].first_char); +  EXPECT_EQ(std::u16string(u"[Ĥéļļö ŵöŕļ").size(), new_string->value->spans[1].last_char); + +  EXPECT_EQ(std::u16string(u"[Ĥé").size(), new_string->value->spans[2].first_char); +  EXPECT_EQ(std::u16string(u"[Ĥéļ").size(), new_string->value->spans[2].last_char); + +  EXPECT_EQ(std::u16string(u"[Ĥéļļö ").size(), new_string->value->spans[3].first_char); +  EXPECT_EQ(std::u16string(u"[Ĥéļļö ŵ").size(), new_string->value->spans[3].last_char); +} + +TEST(PseudolocaleGeneratorTest, PseudolocalizeAdjacentNestedTags) { +  StringPool pool; +  StyleString original_style; +  original_style.str = "bold"; +  original_style.spans = {Span{"b", 0, 3}, Span{"i", 0, 3}}; + +  std::unique_ptr<StyledString> new_string = PseudolocalizeStyledString( +      util::make_unique<StyledString>(pool.MakeRef(original_style)).get(), +      Pseudolocalizer::Method::kAccent, &pool); +  ASSERT_NE(nullptr, new_string); +  ASSERT_EQ(2u, new_string->value->spans.size()); +  EXPECT_EQ(std::string("[ɓöļð one]"), *new_string->value->str); + +  EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name); +  EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[0].first_char); +  EXPECT_EQ(std::u16string(u"[ɓöļ").size(), new_string->value->spans[0].last_char); + +  EXPECT_EQ(std::string("i"), *new_string->value->spans[1].name); +  EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[1].first_char); +  EXPECT_EQ(std::u16string(u"[ɓöļ").size(), new_string->value->spans[1].last_char); +} + +TEST(PseudolocaleGeneratorTest, PseudolocalizeAdjacentTagsUnsorted) { +  StringPool pool; +  StyleString original_style; +  original_style.str = "bold"; +  original_style.spans = {Span{"i", 2, 3}, Span{"b", 0, 1}}; + +  std::unique_ptr<StyledString> new_string = PseudolocalizeStyledString( +      util::make_unique<StyledString>(pool.MakeRef(original_style)).get(), +      Pseudolocalizer::Method::kAccent, &pool); +  ASSERT_NE(nullptr, new_string); +  ASSERT_EQ(2u, new_string->value->spans.size()); +  EXPECT_EQ(std::string("[ɓöļð one]"), *new_string->value->str); + +  EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name); +  EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[0].first_char); +  EXPECT_EQ(std::u16string(u"[ɓ").size(), new_string->value->spans[0].last_char); + +  EXPECT_EQ(std::string("i"), *new_string->value->spans[1].name); +  EXPECT_EQ(std::u16string(u"[ɓö").size(), new_string->value->spans[1].first_char); +  EXPECT_EQ(std::u16string(u"[ɓöļ").size(), new_string->value->spans[1].last_char); +} + +TEST(PseudolocaleGeneratorTest, PseudolocalizeNestedAndAdjacentTags) { +  StringPool pool; +  StyleString original_style; +  original_style.str = "This sentence is not what you think it is at all."; +  original_style.spans = {Span{"b", 16u, 19u}, Span{"em", 29u, 47u}, Span{"i", 38u, 40u}, +                          Span{"b", 44u, 47u}}; + +  std::unique_ptr<StyledString> new_string = PseudolocalizeStyledString( +      util::make_unique<StyledString>(pool.MakeRef(original_style)).get(), +      Pseudolocalizer::Method::kAccent, &pool); +  ASSERT_NE(nullptr, new_string); +  ASSERT_EQ(4u, new_string->value->spans.size()); +  EXPECT_EQ(std::string( +                "[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ åļļ. one two three four five six]"), +            *new_string->value->str); + +  EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name); +  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš").size(), new_string->value->spans[0].first_char); +  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñö").size(), new_string->value->spans[0].last_char); -  EXPECT_EQ(std::string("[Ĥéļļö ").size(), +  EXPECT_EQ(std::string("em"), *new_string->value->spans[1].name); +  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû").size(),              new_string->value->spans[1].first_char); -  EXPECT_EQ(std::string("[Ĥéļļö ŵ").size(), +  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ åļ").size(),              new_string->value->spans[1].last_char); -  EXPECT_EQ(std::string("[Ĥ").size(), new_string->value->spans[2].first_char); -  EXPECT_EQ(std::string("[Ĥéļļö ŵöŕļ").size(), +  EXPECT_EQ(std::string("i"), *new_string->value->spans[2].name); +  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ").size(), +            new_string->value->spans[2].first_char); +  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ î").size(),              new_string->value->spans[2].last_char); -  EXPECT_EQ(std::string("[").size(), new_string->value->spans[3].first_char); -  EXPECT_EQ(std::string("[Ĥéļļö ŵöŕļð").size(), +  EXPECT_EQ(std::string("b"), *new_string->value->spans[3].name); +  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ").size(), +            new_string->value->spans[3].first_char); +  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ åļ").size(),              new_string->value->spans[3].last_char);  } +TEST(PseudolocaleGeneratorTest, PseudolocalizePartsOfString) { +  StringPool pool; +  StyleString original_style; +  original_style.str = "This should NOT be pseudolocalized."; +  original_style.spans = {Span{"em", 4u, 14u}, Span{"i", 18u, 33u}}; +  std::unique_ptr<StyledString> original_string = +      util::make_unique<StyledString>(pool.MakeRef(original_style)); +  original_string->untranslatable_sections = {UntranslatableSection{11u, 15u}}; + +  std::unique_ptr<StyledString> new_string = +      PseudolocalizeStyledString(original_string.get(), Pseudolocalizer::Method::kAccent, &pool); +  ASSERT_NE(nullptr, new_string); +  ASSERT_EQ(2u, new_string->value->spans.size()); +  EXPECT_EQ(std::string("[Ţĥîš šĥöûļð NOT ɓé þšéûðöļöçåļîžéð. one two three four]"), +            *new_string->value->str); + +  EXPECT_EQ(std::string("em"), *new_string->value->spans[0].name); +  EXPECT_EQ(std::u16string(u"[Ţĥîš").size(), new_string->value->spans[0].first_char); +  EXPECT_EQ(std::u16string(u"[Ţĥîš šĥöûļð NO").size(), new_string->value->spans[0].last_char); + +  EXPECT_EQ(std::string("i"), *new_string->value->spans[1].name); +  EXPECT_EQ(std::u16string(u"[Ţĥîš šĥöûļð NOT ɓé").size(), new_string->value->spans[1].first_char); +  EXPECT_EQ(std::u16string(u"[Ţĥîš šĥöûļð NOT ɓé þšéûðöļöçåļîžé").size(), +            new_string->value->spans[1].last_char); +} +  TEST(PseudolocaleGeneratorTest, PseudolocalizeOnlyDefaultConfigs) {    std::unique_ptr<ResourceTable> table =        test::ResourceTableBuilder() @@ -138,7 +240,7 @@ TEST(PseudolocaleGeneratorTest, RespectUntranslateableSections) {    {      StyleString original_style;      original_style.str = "Hello world!"; -    original_style.spans = {Span{"b", 2, 3}, Span{"b", 6, 7}, Span{"i", 1, 10}}; +    original_style.spans = {Span{"i", 1, 10}, Span{"b", 2, 3}, Span{"b", 6, 7}};      auto styled_string =          util::make_unique<StyledString>(table->string_pool.MakeRef(original_style)); |