| /* |
| * Copyright (C) 2015 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "compile/Pseudolocalizer.h" |
| |
| #include "util/Util.h" |
| |
| using android::StringPiece; |
| |
| using namespace std::literals; |
| |
| namespace aapt { |
| |
| // String basis to generate expansion |
| static constexpr auto kExpansionString = |
| "one two three " |
| "four five six seven eight nine ten eleven twelve thirteen " |
| "fourteen fiveteen sixteen seventeen nineteen twenty"sv; |
| |
| // Special unicode characters to override directionality of the words |
| static constexpr auto kRlm = "\u200f"sv; |
| static constexpr auto kRlo = "\u202e"sv; |
| static constexpr auto kPdf = "\u202c"sv; |
| |
| // Placeholder marks |
| static constexpr auto kPlaceholderOpen = "\u00bb"sv; |
| static constexpr auto kPlaceholderClose = "\u00ab"sv; |
| |
| static const char kArgStart = '{'; |
| static const char kArgEnd = '}'; |
| |
| class PseudoMethodNone : public PseudoMethodImpl { |
| public: |
| std::string Text(StringPiece text) override { |
| return std::string(text); |
| } |
| std::string Placeholder(StringPiece text) override { |
| return std::string(text); |
| } |
| }; |
| |
| class PseudoMethodBidi : public PseudoMethodImpl { |
| public: |
| std::string Text(StringPiece text) override; |
| std::string Placeholder(StringPiece text) override; |
| }; |
| |
| class PseudoMethodAccent : public PseudoMethodImpl { |
| public: |
| PseudoMethodAccent() : depth_(0), word_count_(0), length_(0) {} |
| std::string Start() override; |
| std::string End() override; |
| std::string Text(StringPiece text) override; |
| std::string Placeholder(StringPiece text) override; |
| |
| private: |
| size_t depth_; |
| size_t word_count_; |
| size_t length_; |
| }; |
| |
| Pseudolocalizer::Pseudolocalizer(Method method) : last_depth_(0) { |
| SetMethod(method); |
| } |
| |
| void Pseudolocalizer::SetMethod(Method method) { |
| switch (method) { |
| case Method::kNone: |
| impl_ = util::make_unique<PseudoMethodNone>(); |
| break; |
| case Method::kAccent: |
| impl_ = util::make_unique<PseudoMethodAccent>(); |
| break; |
| case Method::kBidi: |
| impl_ = util::make_unique<PseudoMethodBidi>(); |
| break; |
| } |
| } |
| |
| std::string Pseudolocalizer::Text(StringPiece text) { |
| std::string out; |
| size_t depth = last_depth_; |
| size_t lastpos, pos; |
| const size_t length = text.size(); |
| const char* str = text.data(); |
| bool escaped = false; |
| for (lastpos = pos = 0; pos < length; pos++) { |
| char16_t c = str[pos]; |
| if (escaped) { |
| escaped = false; |
| continue; |
| } |
| if (c == '\'') { |
| escaped = true; |
| continue; |
| } |
| |
| if (c == kArgStart) { |
| depth++; |
| } else if (c == kArgEnd && depth) { |
| depth--; |
| } |
| |
| if (last_depth_ != depth || pos == length - 1) { |
| bool pseudo = ((last_depth_ % 2) == 0); |
| size_t nextpos = pos; |
| if (!pseudo || depth == last_depth_) { |
| nextpos++; |
| } |
| size_t size = nextpos - lastpos; |
| if (size) { |
| std::string chunk(text.substr(lastpos, size)); |
| if (pseudo) { |
| chunk = impl_->Text(chunk); |
| } else if (str[lastpos] == kArgStart && str[nextpos - 1] == kArgEnd) { |
| chunk = impl_->Placeholder(chunk); |
| } |
| out.append(chunk); |
| } |
| if (pseudo && depth < last_depth_) { // End of message |
| out.append(impl_->End()); |
| } else if (!pseudo && depth > last_depth_) { // Start of message |
| out.append(impl_->Start()); |
| } |
| lastpos = nextpos; |
| last_depth_ = depth; |
| } |
| } |
| return out; |
| } |
| |
| static const char* PseudolocalizeChar(const char c) { |
| switch (c) { |
| case 'a': |
| return "\u00e5"; |
| case 'b': |
| return "\u0253"; |
| case 'c': |
| return "\u00e7"; |
| case 'd': |
| return "\u00f0"; |
| case 'e': |
| return "\u00e9"; |
| case 'f': |
| return "\u0192"; |
| case 'g': |
| return "\u011d"; |
| case 'h': |
| return "\u0125"; |
| case 'i': |
| return "\u00ee"; |
| case 'j': |
| return "\u0135"; |
| case 'k': |
| return "\u0137"; |
| case 'l': |
| return "\u013c"; |
| case 'm': |
| return "\u1e3f"; |
| case 'n': |
| return "\u00f1"; |
| case 'o': |
| return "\u00f6"; |
| case 'p': |
| return "\u00fe"; |
| case 'q': |
| return "\u0051"; |
| case 'r': |
| return "\u0155"; |
| case 's': |
| return "\u0161"; |
| case 't': |
| return "\u0163"; |
| case 'u': |
| return "\u00fb"; |
| case 'v': |
| return "\u0056"; |
| case 'w': |
| return "\u0175"; |
| case 'x': |
| return "\u0445"; |
| case 'y': |
| return "\u00fd"; |
| case 'z': |
| return "\u017e"; |
| case 'A': |
| return "\u00c5"; |
| case 'B': |
| return "\u03b2"; |
| case 'C': |
| return "\u00c7"; |
| case 'D': |
| return "\u00d0"; |
| case 'E': |
| return "\u00c9"; |
| case 'G': |
| return "\u011c"; |
| case 'H': |
| return "\u0124"; |
| case 'I': |
| return "\u00ce"; |
| case 'J': |
| return "\u0134"; |
| case 'K': |
| return "\u0136"; |
| case 'L': |
| return "\u013b"; |
| case 'M': |
| return "\u1e3e"; |
| case 'N': |
| return "\u00d1"; |
| case 'O': |
| return "\u00d6"; |
| case 'P': |
| return "\u00de"; |
| case 'Q': |
| return "\u0071"; |
| case 'R': |
| return "\u0154"; |
| case 'S': |
| return "\u0160"; |
| case 'T': |
| return "\u0162"; |
| case 'U': |
| return "\u00db"; |
| case 'V': |
| return "\u03bd"; |
| case 'W': |
| return "\u0174"; |
| case 'X': |
| return "\u00d7"; |
| case 'Y': |
| return "\u00dd"; |
| case 'Z': |
| return "\u017d"; |
| case '!': |
| return "\u00a1"; |
| case '?': |
| return "\u00bf"; |
| case '$': |
| return "\u20ac"; |
| default: |
| return nullptr; |
| } |
| } |
| |
| static bool IsPossibleNormalPlaceholderEnd(const char c) { |
| switch (c) { |
| case 's': |
| return true; |
| case 'S': |
| return true; |
| case 'c': |
| return true; |
| case 'C': |
| return true; |
| case 'd': |
| return true; |
| case 'o': |
| return true; |
| case 'x': |
| return true; |
| case 'X': |
| return true; |
| case 'f': |
| return true; |
| case 'e': |
| return true; |
| case 'E': |
| return true; |
| case 'g': |
| return true; |
| case 'G': |
| return true; |
| case 'a': |
| return true; |
| case 'A': |
| return true; |
| case 'b': |
| return true; |
| case 'B': |
| return true; |
| case 'h': |
| return true; |
| case 'H': |
| return true; |
| case '%': |
| return true; |
| case 'n': |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| static std::string PseudoGenerateExpansion(const unsigned int length) { |
| std::string result(kExpansionString); |
| if (result.size() < length) { |
| result += " "; |
| result += PseudoGenerateExpansion(length - result.size()); |
| } else { |
| int ext = 0; |
| // Should contain only whole words, so looking for a space |
| { |
| const char* const s = result.data(); |
| for (unsigned int i = length + 1; i < result.size(); ++i) { |
| ++ext; |
| if (s[i] == ' ') { |
| break; |
| } |
| } |
| } |
| result.resize(length + ext); |
| } |
| return result; |
| } |
| |
| std::string PseudoMethodAccent::Start() { |
| std::string result; |
| if (depth_ == 0) { |
| result = "["; |
| } |
| word_count_ = length_ = 0; |
| depth_++; |
| return result; |
| } |
| |
| std::string PseudoMethodAccent::End() { |
| std::string result; |
| if (length_) { |
| result += " "; |
| result += PseudoGenerateExpansion(word_count_ > 3 ? length_ : length_ / 2); |
| } |
| word_count_ = length_ = 0; |
| depth_--; |
| if (depth_ == 0) { |
| result += "]"; |
| } |
| return result; |
| } |
| |
| /** |
| * Converts characters so they look like they've been localized. |
| * |
| * Note: This leaves placeholder syntax untouched. |
| */ |
| std::string PseudoMethodAccent::Text(StringPiece source) { |
| const char* s = source.data(); |
| std::string result; |
| const size_t I = source.size(); |
| bool lastspace = true; |
| for (size_t i = 0; i < I; i++) { |
| char c = s[i]; |
| if (c == '%') { |
| // Placeholder syntax, no need to pseudolocalize |
| std::string chunk; |
| bool end = false; |
| chunk.append(&c, 1); |
| while (!end && i + 1 < I) { |
| ++i; |
| c = s[i]; |
| chunk.append(&c, 1); |
| if (IsPossibleNormalPlaceholderEnd(c)) { |
| end = true; |
| } else if (i + 1 < I && c == 't') { |
| ++i; |
| c = s[i]; |
| chunk.append(&c, 1); |
| end = true; |
| } |
| } |
| // Treat chunk as a placeholder unless it ends with %. |
| result += ((c == '%') ? chunk : Placeholder(chunk)); |
| } else if (c == '<' || c == '&') { |
| // html syntax, no need to pseudolocalize |
| bool tag_closed = false; |
| while (!tag_closed && i < I) { |
| if (c == '&') { |
| std::string escape_text; |
| escape_text.append(&c, 1); |
| bool end = false; |
| size_t html_code_pos = i; |
| while (!end && html_code_pos < I) { |
| ++html_code_pos; |
| c = s[html_code_pos]; |
| escape_text.append(&c, 1); |
| // Valid html code |
| if (c == ';') { |
| end = true; |
| i = html_code_pos; |
| } |
| // Wrong html code |
| else if (!((c == '#' || (c >= 'a' && c <= 'z') || |
| (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')))) { |
| end = true; |
| } |
| } |
| result += escape_text; |
| if (escape_text != "<") { |
| tag_closed = true; |
| } |
| continue; |
| } |
| if (c == '>') { |
| tag_closed = true; |
| result.append(&c, 1); |
| continue; |
| } |
| result.append(&c, 1); |
| i++; |
| c = s[i]; |
| } |
| } else { |
| // This is a pure text that should be pseudolocalized |
| const char* p = PseudolocalizeChar(c); |
| if (p != nullptr) { |
| result += p; |
| } else { |
| bool space = isspace(c); |
| if (lastspace && !space) { |
| word_count_++; |
| } |
| lastspace = space; |
| result.append(&c, 1); |
| } |
| // Count only pseudolocalizable chars and delimiters |
| length_++; |
| } |
| } |
| return result; |
| } |
| |
| std::string PseudoMethodAccent::Placeholder(StringPiece source) { |
| // Surround a placeholder with brackets |
| return (std::string(kPlaceholderOpen) += source) += kPlaceholderClose; |
| } |
| |
| std::string PseudoMethodBidi::Text(StringPiece source) { |
| const char* s = source.data(); |
| std::string result; |
| bool lastspace = true; |
| bool space = true; |
| bool escape = false; |
| const char ESCAPE_CHAR = '\\'; |
| for (size_t i = 0; i < source.size(); i++) { |
| char c = s[i]; |
| if (!escape && c == ESCAPE_CHAR) { |
| escape = true; |
| continue; |
| } |
| space = (!escape && isspace(c)) || (escape && (c == 'n' || c == 't')); |
| if (lastspace && !space) { |
| // Word start |
| (result += kRlm) += kRlo; |
| } else if (!lastspace && space) { |
| // Word end |
| (result += kPdf) += kRlm; |
| } |
| lastspace = space; |
| if (escape) { |
| result.append(&ESCAPE_CHAR, 1); |
| escape=false; |
| } |
| result.append(&c, 1); |
| } |
| if (!lastspace) { |
| // End of last word |
| (result += kPdf) += kRlm; |
| } |
| return result; |
| } |
| |
| std::string PseudoMethodBidi::Placeholder(StringPiece source) { |
| // Surround a placeholder with directionality change sequence |
| return (((std::string(kRlm) += kRlo) += source) += kPdf) += kRlm; |
| } |
| |
| } // namespace aapt |