From d86ea58bddea7d5608e3539fc77e3d805c0af1d1 Mon Sep 17 00:00:00 2001 From: Ryan Mitchell Date: Wed, 27 Jun 2018 11:57:18 -0700 Subject: AAPT2: Encode 4-byte strings in Modified UTF-8 Codepoints that are encoded to 4 bytes in UTF-8 are not allowed in Modified UTF-8. They instead should be encoded as surrogate pairs in the same way that CESU-8 allows for surrogate pairs. This will also cause 4 byte UTF-8 codes to be represented in 6 bytes. Bug: 37140916 Test: aapt2_tests Change-Id: I155dc24f166139d1d36a16bac088dcfcd59eb321 --- tools/aapt2/StringPool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/aapt2/StringPool.cpp') diff --git a/tools/aapt2/StringPool.cpp b/tools/aapt2/StringPool.cpp index b37e1fbd9693..8eabd3225d87 100644 --- a/tools/aapt2/StringPool.cpp +++ b/tools/aapt2/StringPool.cpp @@ -367,7 +367,7 @@ const std::string kStringTooLarge = "STRING_TOO_LARGE"; static bool EncodeString(const std::string& str, const bool utf8, BigBuffer* out, IDiagnostics* diag) { if (utf8) { - const std::string& encoded = str; + const std::string& encoded = util::Utf8ToModifiedUtf8(str); const ssize_t utf16_length = utf8_to_utf16_length( reinterpret_cast(encoded.data()), encoded.size()); CHECK(utf16_length >= 0); -- cgit v1.2.3-59-g8ed1b