Reland "Do not create 4-byte sequences in `ConvertUtf16ToModifiedUtf8()`"
This encoding was different from the encoding in dex files
and this caused several issues for classes with descriptors
containing at least one character outside the BMP plane.
For example `Class.forName(nonBmpClass.getName())` would
fail for such classes.
This reverts commit 1b9d442dc906d0158300c5178683f417fa59b026.
Reason for revert: Reland after
https://android-review.googlesource.com/1771690 .
This change no longer affects the operation of JNI functions
dealing with String "UTF" encoding, documented as Modified
UTF-8 but producing an odd variation between Modified UTF-8
and normal UTF-8 in ART due to historical reasons.
Test: New test 183-non-bmp-package-name
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 192935764
Bug: 238888095
Change-Id: I7191e030c7b69e4ea6848fe82d0625683a9331d9
diff --git a/libdexfile/dex/utf.cc b/libdexfile/dex/utf.cc
index 9692a26..ee55a09 100644
--- a/libdexfile/dex/utf.cc
+++ b/libdexfile/dex/utf.cc
@@ -121,8 +121,10 @@
}
}
-void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
- const uint16_t* utf16_in, size_t char_count) {
+void ConvertUtf16ToModifiedUtf8(char* utf8_out,
+ size_t byte_count,
+ const uint16_t* utf16_in,
+ size_t char_count) {
if (LIKELY(byte_count == char_count)) {
// Common case where all characters are ASCII.
const uint16_t *utf16_end = utf16_in + char_count;
@@ -133,10 +135,9 @@
}
// String contains non-ASCII characters.
- // FIXME: We should not emit 4-byte sequences. Bug: 192935764
auto append = [&](char c) { *utf8_out++ = c; };
ConvertUtf16ToUtf8</*kUseShortZero=*/ false,
- /*kUse4ByteSequence=*/ true,
+ /*kUse4ByteSequence=*/ false,
/*kReplaceBadSurrogates=*/ false>(utf16_in, char_count, append);
}
@@ -207,11 +208,10 @@
}
size_t CountModifiedUtf8BytesInUtf16(const uint16_t* chars, size_t char_count) {
- // FIXME: We should not emit 4-byte sequences. Bug: 192935764
size_t result = 0;
auto append = [&](char c ATTRIBUTE_UNUSED) { ++result; };
ConvertUtf16ToUtf8</*kUseShortZero=*/ false,
- /*kUse4ByteSequence=*/ true,
+ /*kUse4ByteSequence=*/ false,
/*kReplaceBadSurrogates=*/ false>(chars, char_count, append);
return result;
}
diff --git a/libdexfile/dex/utf.h b/libdexfile/dex/utf.h
index d372bff..86ca211 100644
--- a/libdexfile/dex/utf.h
+++ b/libdexfile/dex/utf.h
@@ -90,8 +90,10 @@
* this anyway, so if you want a NUL-terminated string, you know where to
* put the NUL byte.
*/
-void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
- const uint16_t* utf16_in, size_t char_count);
+void ConvertUtf16ToModifiedUtf8(char* utf8_out,
+ size_t byte_count,
+ const uint16_t* utf16_in,
+ size_t char_count);
/*
* The java.lang.String hashCode() algorithm.
diff --git a/libdexfile/dex/utf_test.cc b/libdexfile/dex/utf_test.cc
index 85c74d2..acf217f 100644
--- a/libdexfile/dex/utf_test.cc
+++ b/libdexfile/dex/utf_test.cc
@@ -126,8 +126,8 @@
}
TEST_F(UtfTest, CountAndConvertUtf8Bytes) {
- // Surrogate pairs will be converted into 4 byte sequences.
- AssertConversion({ 0xd801, 0xdc00 }, { 0xf0, 0x90, 0x90, 0x80 });
+ // Surrogate pairs will be converted into two three-byte sequences.
+ AssertConversion({ 0xd801, 0xdc00 }, { 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x80 });
// Three byte encodings that are below & above the leading surrogate
// range respectively.
@@ -143,12 +143,12 @@
AssertConversion({ 'h', 'e', 'l', 'l', 'o' }, { 0x68, 0x65, 0x6c, 0x6c, 0x6f });
AssertConversion({
- 0xd802, 0xdc02, // Surrogate pair.
+ 0xd802, 0xdc02, // Surrogate pair - three byte encodings.
0xdef0, 0xdcff, // Three byte encodings.
0x0101, 0x0000, // Two byte encodings.
'p' , 'p' // One byte encoding.
}, {
- 0xf0, 0x90, 0xa0, 0x82,
+ 0xed, 0xa0, 0x82, 0xed, 0xb0, 0x82,
0xed, 0xbb, 0xb0, 0xed, 0xb3, 0xbf,
0xc4, 0x81, 0xc0, 0x80,
0x70, 0x70
@@ -235,25 +235,6 @@
const uint16_t ch = *chars++;
if (ch > 0 && ch <= 0x7f) {
++result;
- } else if (ch >= 0xd800 && ch <= 0xdbff) {
- if (char_count > 0) {
- const uint16_t ch2 = *chars;
- // If we find a properly paired surrogate, we emit it as a 4 byte
- // UTF sequence. If we find an unpaired leading or trailing surrogate,
- // we emit it as a 3 byte sequence like would have done earlier.
- if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
- chars++;
- char_count--;
-
- result += 4;
- } else {
- result += 3;
- }
- } else {
- // This implies we found an unpaired trailing surrogate at the end
- // of a string.
- result += 3;
- }
} else if (ch > 0x7ff) {
result += 3;
} else {
@@ -270,28 +251,6 @@
if (ch > 0 && ch <= 0x7f) {
*utf8_out++ = ch;
} else {
- // Char_count == 0 here implies we've encountered an unpaired
- // surrogate and we have no choice but to encode it as 3-byte UTF
- // sequence. Note that unpaired surrogates can occur as a part of
- // "normal" operation.
- if ((ch >= 0xd800 && ch <= 0xdbff) && (char_count > 0)) {
- const uint16_t ch2 = *utf16_in;
-
- // Check if the other half of the pair is within the expected
- // range. If it isn't, we will have to emit both "halves" as
- // separate 3 byte sequences.
- if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
- utf16_in++;
- char_count--;
- const uint32_t code_point = (ch << 10) + ch2 - 0x035fdc00;
- *utf8_out++ = (code_point >> 18) | 0xf0;
- *utf8_out++ = ((code_point >> 12) & 0x3f) | 0x80;
- *utf8_out++ = ((code_point >> 6) & 0x3f) | 0x80;
- *utf8_out++ = (code_point & 0x3f) | 0x80;
- continue;
- }
- }
-
if (ch > 0x07ff) {
// Three byte encoding.
*utf8_out++ = (ch >> 12) | 0xe0;
@@ -313,39 +272,40 @@
second = (code_point & 0x03ff) + 0xdc00;
}
-static void testConversions(uint16_t *buf, int char_count) {
- char bytes_test[8] = { 0 }, bytes_reference[8] = { 0 };
- uint16_t out_buf_test[4] = { 0 }, out_buf_reference[4] = { 0 };
- int byte_count_test, byte_count_reference;
- int char_count_test, char_count_reference;
-
+static void testConversions(uint16_t *buf, size_t char_count) {
// Calculate the number of utf-8 bytes for the utf-16 chars.
- byte_count_reference = CountModifiedUtf8BytesInUtf16_reference(buf, char_count);
- byte_count_test = CountModifiedUtf8BytesInUtf16(buf, char_count);
- EXPECT_EQ(byte_count_reference, byte_count_test);
+ size_t byte_count_reference = CountModifiedUtf8BytesInUtf16_reference(buf, char_count);
+ size_t byte_count_test = CountModifiedUtf8BytesInUtf16(buf, char_count);
+ ASSERT_EQ(byte_count_reference, byte_count_test);
// Convert the utf-16 string to utf-8 bytes.
+ char bytes_test[8], bytes_reference[9];
+ ASSERT_LT(byte_count_reference, arraysize(bytes_reference));
ConvertUtf16ToModifiedUtf8_reference(bytes_reference, buf, char_count);
+ ASSERT_LE(byte_count_test, arraysize(bytes_test));
ConvertUtf16ToModifiedUtf8(bytes_test, byte_count_test, buf, char_count);
- for (int i = 0; i < byte_count_test; ++i) {
- EXPECT_EQ(bytes_reference[i], bytes_test[i]);
+ for (size_t i = 0; i < byte_count_test; ++i) {
+ ASSERT_EQ(bytes_reference[i], bytes_test[i]);
}
// Calculate the number of utf-16 chars from the utf-8 bytes.
bytes_reference[byte_count_reference] = 0; // Reference function needs null termination.
- char_count_reference = CountModifiedUtf8Chars_reference(bytes_reference);
- char_count_test = CountModifiedUtf8Chars(bytes_test, byte_count_test);
- EXPECT_EQ(char_count, char_count_reference);
- EXPECT_EQ(char_count, char_count_test);
+ size_t char_count_reference = CountModifiedUtf8Chars_reference(bytes_reference);
+ size_t char_count_test = CountModifiedUtf8Chars(bytes_test, byte_count_test);
+ ASSERT_EQ(char_count, char_count_reference);
+ ASSERT_EQ(char_count, char_count_test);
// Convert the utf-8 bytes back to utf-16 chars.
// Does not need copied _reference version of the function because the original
// function with the old API is retained for debug/testing code.
+ uint16_t out_buf_test[4], out_buf_reference[4];
+ ASSERT_LE(char_count_reference, arraysize(out_buf_reference));
ConvertModifiedUtf8ToUtf16(out_buf_reference, bytes_reference);
+ ASSERT_LE(char_count_test, arraysize(out_buf_test));
ConvertModifiedUtf8ToUtf16(out_buf_test, char_count_test, bytes_test, byte_count_test);
- for (int i = 0; i < char_count_test; ++i) {
- EXPECT_EQ(buf[i], out_buf_reference[i]);
- EXPECT_EQ(buf[i], out_buf_test[i]);
+ for (size_t i = 0; i < char_count_test; ++i) {
+ ASSERT_EQ(buf[i], out_buf_reference[i]);
+ ASSERT_EQ(buf[i], out_buf_test[i]);
}
}
diff --git a/test/183-non-bmp-package-name/expected-stderr.txt b/test/183-non-bmp-package-name/expected-stderr.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/183-non-bmp-package-name/expected-stderr.txt
diff --git a/test/183-non-bmp-package-name/expected-stdout.txt b/test/183-non-bmp-package-name/expected-stdout.txt
new file mode 100644
index 0000000..9058e58
--- /dev/null
+++ b/test/183-non-bmp-package-name/expected-stdout.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Invoke public abstract void pkg𐀀.PackageTestInterface.interfaceMethod()
diff --git a/test/183-non-bmp-package-name/info.txt b/test/183-non-bmp-package-name/info.txt
new file mode 100644
index 0000000..8b15b5a
--- /dev/null
+++ b/test/183-non-bmp-package-name/info.txt
@@ -0,0 +1,7 @@
+Regression test for bad handling of package name containing a character outside
+the BMP plane. For a proxy class with a non-public interface in such a package,
+this caused the package name comparison to fail because the dex file encoding
+had two three-byte sequences while the descriptor was encoded with a four-byte
+sequence, leading to IllegalArgumentException when calling a proxy method via
+the interface.
+Bug: 192935764
diff --git a/test/183-non-bmp-package-name/jni_find_class.cc b/test/183-non-bmp-package-name/jni_find_class.cc
new file mode 100644
index 0000000..8a9e967
--- /dev/null
+++ b/test/183-non-bmp-package-name/jni_find_class.cc
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/logging.h"
+#include "jni.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-current-inl.h"
+
+namespace art {
+
+extern "C" JNIEXPORT jclass JNICALL Java_Main_jniFindClass(
+ JNIEnv* env, jclass, jstring class_name) {
+ CHECK(class_name != nullptr);
+ // FIXME: We should test consistency with `env->GetStringUTFChars(...)` here
+ // but the JNI uses the wrong encoding. Bug: 238888095
+ if ((true)) {
+ ScopedObjectAccess soa(Thread::Current());
+ std::string name = soa.Decode<mirror::String>(class_name)->ToModifiedUtf8();
+ jclass clazz = env->FindClass(name.c_str());
+ return clazz;
+ } else {
+ const char* name = env->GetStringUTFChars(class_name, nullptr);
+ CHECK(name != nullptr);
+ jclass clazz = env->FindClass(name);
+ env->ReleaseStringUTFChars(class_name, name);
+ return clazz;
+ }
+}
+
+} // namespace art
+
diff --git a/test/183-non-bmp-package-name/src/Main.java b/test/183-non-bmp-package-name/src/Main.java
new file mode 100644
index 0000000..973da97
--- /dev/null
+++ b/test/183-non-bmp-package-name/src/Main.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import pkg𐀀.PackageTest;
+
+public class Main {
+ public static void main(String[] args) {
+ System.loadLibrary(args[0]);
+
+ PackageTest.main();
+ }
+
+ public static native Class<?> jniFindClass(String className);
+}
diff --git "a/test/183-non-bmp-package-name/src/pkg\360\220\200\200/PackageTest.java" "b/test/183-non-bmp-package-name/src/pkg\360\220\200\200/PackageTest.java"
new file mode 100644
index 0000000..2b3b968
--- /dev/null
+++ "b/test/183-non-bmp-package-name/src/pkg\360\220\200\200/PackageTest.java"
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package pkg𐀀;
+
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+
+public class PackageTest {
+ public static void main() {
+ testJniFindClass();
+ testClassForName();
+ testProxy();
+ }
+
+ private static void testJniFindClass() {
+ try {
+ // Cannot import "Main". Use reflection to call `Main.jniFindClass()`.
+ Class<?> mainClass = Class.forName("Main");
+ Method jniFindClass = mainClass.getMethod("jniFindClass", String.class);
+ Class<?> klass = (Class<?>) jniFindClass.invoke(null, "pkg𐀀/PackageTest");
+ if (klass != PackageTest.class) {
+ System.out.println("Unexpected class: " + klass);
+ }
+ } catch (InvocationTargetException ite) {
+ ite.getCause().printStackTrace(System.out); // Unwrap the cause.
+ } catch (Throwable t) {
+ t.printStackTrace(System.out);
+ }
+ }
+
+ private static void testClassForName() {
+ try {
+ Class<?> klass = Class.forName("pkg𐀀.PackageTest");
+ if (klass != PackageTest.class) {
+ System.out.println("Unexpected class: " + klass);
+ }
+ } catch (Throwable t) {
+ t.printStackTrace(System.out);
+ }
+ }
+
+ private static void testProxy() {
+ try {
+ InvocationHandler handler = new PackageInvocationHandler();
+ Class<?> proxyClass = Proxy.getProxyClass(
+ PackageTestInterface.class.getClassLoader(), PackageTestInterface.class);
+ Constructor<?> ctor = proxyClass.getConstructor(InvocationHandler.class);
+ Object proxy = ctor.newInstance(handler);
+ PackageTestInterface asInterface = (PackageTestInterface) proxy;
+ asInterface.interfaceMethod();
+ } catch (Throwable t) {
+ t.printStackTrace(System.out);
+ }
+ }
+}
+
+interface PackageTestInterface {
+ public void interfaceMethod();
+}
+
+class PackageInvocationHandler implements InvocationHandler {
+ public Object invoke(Object proxy, Method method, Object[] args) {
+ System.out.println("Invoke " + method);
+ return null;
+ }
+}
diff --git a/test/Android.bp b/test/Android.bp
index c5322cb..4111009 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -938,6 +938,7 @@
"177-visibly-initialized-deadlock/visibly_initialized.cc",
"178-app-image-native-method/native_methods.cc",
"179-nonvirtual-jni/nonvirtual-call.cc",
+ "183-non-bmp-package-name/jni_find_class.cc",
"1945-proxy-method-arguments/get_args.cc",
"203-multi-checkpoint/multi_checkpoint.cc",
"305-other-fault-handler/fault_handler.cc",
diff --git a/test/knownfailures.json b/test/knownfailures.json
index da318d9..98156f4 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -1131,6 +1131,7 @@
},
{
"tests": ["178-app-image-native-method",
+ "183-non-bmp-package-name",
"530-checker-peel-unroll",
"616-cha-unloading",
"674-hiddenapi",
diff --git a/test/run_test_build.py b/test/run_test_build.py
index 75cd64f..b81ef93 100755
--- a/test/run_test_build.py
+++ b/test/run_test_build.py
@@ -102,6 +102,7 @@
"SMALI": args.smali.absolute(),
"SOONG_ZIP": args.soong_zip.absolute(),
"TEST_NAME": self.test_name,
+ "LANG": "en_US.UTF-8", # Needed to pass UTF-8 command line arguments.
}
def bash(self, cmd):
diff --git a/test/ti-agent/ti_utf.h b/test/ti-agent/ti_utf.h
index 15fe22c..a585b6e 100644
--- a/test/ti-agent/ti_utf.h
+++ b/test/ti-agent/ti_utf.h
@@ -169,19 +169,17 @@
}
// String contains non-ASCII characters.
- // FIXME: We should not emit 4-byte sequences. Bug: 192935764
auto append = [&](char c) { *utf8_out++ = c; };
ConvertUtf16ToUtf8</*kUseShortZero=*/ false,
- /*kUse4ByteSequence=*/ true,
+ /*kUse4ByteSequence=*/ false,
/*kReplaceBadSurrogates=*/ false>(utf16_in, char_count, append);
}
inline size_t CountModifiedUtf8BytesInUtf16(const uint16_t* chars, size_t char_count) {
- // FIXME: We should not emit 4-byte sequences. Bug: 192935764
size_t result = 0;
auto append = [&](char c ATTRIBUTE_UNUSED) { ++result; };
ConvertUtf16ToUtf8</*kUseShortZero=*/ false,
- /*kUse4ByteSequence=*/ true,
+ /*kUse4ByteSequence=*/ false,
/*kReplaceBadSurrogates=*/ false>(chars, char_count, append);
return result;
}
diff --git a/test/utils/regen-test-files b/test/utils/regen-test-files
index 5b72479..7b43d4a 100755
--- a/test/utils/regen-test-files
+++ b/test/utils/regen-test-files
@@ -399,6 +399,9 @@
# Skip test with a copy of `sun.misc.Unsafe`.
if os.path.isfile(os.path.join(run_test_path, "src", "sun", "misc", "Unsafe.java")):
return False
+ # Ignore test with a non-ascii package name `pkg𐀀`. b/193141629
+ if os.path.isdir(os.path.join(run_test_path, "src", "pkg𐀀")):
+ return False
# Skip tests with Hidden API specs.
if os.path.isfile(os.path.join(run_test_path, "hiddenapi-flags.csv")):
return False