Be more lenient with 4 byte UTF-8 sequences. Accept 4 byte sequences and convert them into surrogate pairs instead of expecting 2 separate 3 byte sequences each encoding one half of a surrogate pair. Note that in addition to supporting 4 byte sequences in strings from JNI, we also tolerate them in dex files. This is mainly for consistency, and there's no need to claim any sort of official support. bug: 18848397 bug: https://code.google.com/p/android/issues/detail?id=81341 Change-Id: Ibc98d29e59d98803e640f2489ea4c56912a59b29

commit: a5afcfc73141e5e378d79a326d02c5c2039fb025 [log] [tgz]
author: Narayan Kamath <narayan@google.com> Thu Jan 29 20:06:46 2015 +0000
committer: Narayan Kamath <narayan@google.com> Thu Feb 12 11:54:37 2015 +0000
tree: 424add9558fb816c4f1d2f4edd128f4f2a086d9a
parent: 5a3399deaf448c8434d9ba0916ff799b1b791d95 [diff] [blame]
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index fb42d28..9b345a6 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc

@@ -67,7 +67,7 @@
     ASSERT_TRUE(string->Equals(utf8_in) || (expected_utf16_length == 1 && strlen(utf8_in) == 0));
     ASSERT_TRUE(string->Equals(StringPiece(utf8_in)) || (expected_utf16_length == 1 && strlen(utf8_in) == 0));
     for (int32_t i = 0; i < expected_utf16_length; i++) {
-      EXPECT_EQ(utf16_expected[i], string->CharAt(i));
+      EXPECT_EQ(utf16_expected[i], string->UncheckedCharAt(i));
     }
     EXPECT_EQ(expected_hash, string->GetHashCode());
   }
@@ -424,6 +424,12 @@
   AssertString(1, "\xe1\x88\xb4",   "\x12\x34",                 0x1234);
   AssertString(1, "\xef\xbf\xbf",   "\xff\xff",                 0xffff);
   AssertString(3, "h\xe1\x88\xb4i", "\x00\x68\x12\x34\x00\x69", (31 * ((31 * 0x68) + 0x1234)) + 0x69);
+
+  // Test four-byte characters.
+  AssertString(2, "\xf0\x9f\x8f\xa0",  "\xd8\x3c\xdf\xe0", (31 * 0xd83c) + 0xdfe0);
+  AssertString(2, "\xf0\x9f\x9a\x80",  "\xd8\x3d\xde\x80", (31 * 0xd83d) + 0xde80);
+  AssertString(4, "h\xf0\x9f\x9a\x80i", "\x00\x68\xd8\x3d\xde\x80\x00\x69",
+               (31 * (31 * (31 * 0x68 +  0xd83d) + 0xde80) + 0x69));
 }
 
 TEST_F(ObjectTest, StringEqualsUtf8) {
commit	a5afcfc73141e5e378d79a326d02c5c2039fb025	[log] [tgz]
author	Narayan Kamath <narayan@google.com>	Thu Jan 29 20:06:46 2015 +0000
committer	Narayan Kamath <narayan@google.com>	Thu Feb 12 11:54:37 2015 +0000
tree	424add9558fb816c4f1d2f4edd128f4f2a086d9a
parent	5a3399deaf448c8434d9ba0916ff799b1b791d95 [diff] [blame]