diff options
author | 2023-01-11 00:45:46 +0000 | |
---|---|---|
committer | 2023-01-11 00:45:46 +0000 | |
commit | 6183e758e124f2a96b3f52a3ec27c00f289ec464 (patch) | |
tree | 5c60c24004ebac413aa478dd3b3f18b5704457c8 | |
parent | 5baa7724fb6ec13674678275ba70b30854a80477 (diff) |
Revert "Reland^2 "Do not create 4-byte sequences in `ConvertUtf16ToModifiedUtf8()`""
This reverts commit 5baa7724fb6ec13674678275ba70b30854a80477.
Reason for revert: Still seems to be triggering test failures. E.g. https://ci.chromium.org/ui/p/art/builders/ci/host-x86_64-cdex-fast/5926/overview
Change-Id: I94ca1dd3bb21b25f94a9a563adc549de55670a80
-rw-r--r-- | libdexfile/dex/utf.cc | 12 | ||||
-rw-r--r-- | libdexfile/dex/utf.h | 6 | ||||
-rw-r--r-- | libdexfile/dex/utf_test.cc | 86 | ||||
-rw-r--r-- | test/183-non-bmp-package-name/expected-stderr.txt | 0 | ||||
-rw-r--r-- | test/183-non-bmp-package-name/expected-stdout.txt | 2 | ||||
-rw-r--r-- | test/183-non-bmp-package-name/info.txt | 7 | ||||
-rw-r--r-- | test/183-non-bmp-package-name/jni_find_class.cc | 44 | ||||
-rw-r--r-- | test/183-non-bmp-package-name/src/Main.java | 27 | ||||
-rw-r--r-- | test/183-non-bmp-package-name/src/pkg𐀀/PackageTest.java | 83 | ||||
-rw-r--r-- | test/Android.bp | 1 | ||||
-rw-r--r-- | test/knownfailures.json | 1 | ||||
-rwxr-xr-x | test/run_test_build.py | 6 | ||||
-rw-r--r-- | test/ti-agent/ti_utf.h | 6 | ||||
-rwxr-xr-x | test/utils/regen-test-files | 3 |
14 files changed, 76 insertions, 208 deletions
diff --git a/libdexfile/dex/utf.cc b/libdexfile/dex/utf.cc index ee55a09f32..9692a26827 100644 --- a/libdexfile/dex/utf.cc +++ b/libdexfile/dex/utf.cc @@ -121,10 +121,8 @@ void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, size_t out_chars, } } -void ConvertUtf16ToModifiedUtf8(char* utf8_out, - size_t byte_count, - const uint16_t* utf16_in, - size_t char_count) { +void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count, + const uint16_t* utf16_in, size_t char_count) { if (LIKELY(byte_count == char_count)) { // Common case where all characters are ASCII. const uint16_t *utf16_end = utf16_in + char_count; @@ -135,9 +133,10 @@ void ConvertUtf16ToModifiedUtf8(char* utf8_out, } // String contains non-ASCII characters. + // FIXME: We should not emit 4-byte sequences. Bug: 192935764 auto append = [&](char c) { *utf8_out++ = c; }; ConvertUtf16ToUtf8</*kUseShortZero=*/ false, - /*kUse4ByteSequence=*/ false, + /*kUse4ByteSequence=*/ true, /*kReplaceBadSurrogates=*/ false>(utf16_in, char_count, append); } @@ -208,10 +207,11 @@ int CompareModifiedUtf8ToUtf16AsCodePointValues(const char* utf8, const uint16_t } size_t CountModifiedUtf8BytesInUtf16(const uint16_t* chars, size_t char_count) { + // FIXME: We should not emit 4-byte sequences. Bug: 192935764 size_t result = 0; auto append = [&](char c ATTRIBUTE_UNUSED) { ++result; }; ConvertUtf16ToUtf8</*kUseShortZero=*/ false, - /*kUse4ByteSequence=*/ false, + /*kUse4ByteSequence=*/ true, /*kReplaceBadSurrogates=*/ false>(chars, char_count, append); return result; } diff --git a/libdexfile/dex/utf.h b/libdexfile/dex/utf.h index 86ca211450..d372bff662 100644 --- a/libdexfile/dex/utf.h +++ b/libdexfile/dex/utf.h @@ -90,10 +90,8 @@ size_t CountModifiedUtf8BytesInUtf16(const uint16_t* chars, size_t char_count); * this anyway, so if you want a NUL-terminated string, you know where to * put the NUL byte. */ -void ConvertUtf16ToModifiedUtf8(char* utf8_out, - size_t byte_count, - const uint16_t* utf16_in, - size_t char_count); +void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count, + const uint16_t* utf16_in, size_t char_count); /* * The java.lang.String hashCode() algorithm. diff --git a/libdexfile/dex/utf_test.cc b/libdexfile/dex/utf_test.cc index acf217f0c1..85c74d285c 100644 --- a/libdexfile/dex/utf_test.cc +++ b/libdexfile/dex/utf_test.cc @@ -126,8 +126,8 @@ static void AssertConversion(const std::vector<uint16_t>& input, } TEST_F(UtfTest, CountAndConvertUtf8Bytes) { - // Surrogate pairs will be converted into two three-byte sequences. - AssertConversion({ 0xd801, 0xdc00 }, { 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x80 }); + // Surrogate pairs will be converted into 4 byte sequences. + AssertConversion({ 0xd801, 0xdc00 }, { 0xf0, 0x90, 0x90, 0x80 }); // Three byte encodings that are below & above the leading surrogate // range respectively. @@ -143,12 +143,12 @@ TEST_F(UtfTest, CountAndConvertUtf8Bytes) { AssertConversion({ 'h', 'e', 'l', 'l', 'o' }, { 0x68, 0x65, 0x6c, 0x6c, 0x6f }); AssertConversion({ - 0xd802, 0xdc02, // Surrogate pair - three byte encodings. + 0xd802, 0xdc02, // Surrogate pair. 0xdef0, 0xdcff, // Three byte encodings. 0x0101, 0x0000, // Two byte encodings. 'p' , 'p' // One byte encoding. }, { - 0xed, 0xa0, 0x82, 0xed, 0xb0, 0x82, + 0xf0, 0x90, 0xa0, 0x82, 0xed, 0xbb, 0xb0, 0xed, 0xb3, 0xbf, 0xc4, 0x81, 0xc0, 0x80, 0x70, 0x70 @@ -235,6 +235,25 @@ static size_t CountModifiedUtf8BytesInUtf16_reference(const uint16_t* chars, siz const uint16_t ch = *chars++; if (ch > 0 && ch <= 0x7f) { ++result; + } else if (ch >= 0xd800 && ch <= 0xdbff) { + if (char_count > 0) { + const uint16_t ch2 = *chars; + // If we find a properly paired surrogate, we emit it as a 4 byte + // UTF sequence. If we find an unpaired leading or trailing surrogate, + // we emit it as a 3 byte sequence like would have done earlier. + if (ch2 >= 0xdc00 && ch2 <= 0xdfff) { + chars++; + char_count--; + + result += 4; + } else { + result += 3; + } + } else { + // This implies we found an unpaired trailing surrogate at the end + // of a string. + result += 3; + } } else if (ch > 0x7ff) { result += 3; } else { @@ -251,6 +270,28 @@ static void ConvertUtf16ToModifiedUtf8_reference(char* utf8_out, const uint16_t* if (ch > 0 && ch <= 0x7f) { *utf8_out++ = ch; } else { + // Char_count == 0 here implies we've encountered an unpaired + // surrogate and we have no choice but to encode it as 3-byte UTF + // sequence. Note that unpaired surrogates can occur as a part of + // "normal" operation. + if ((ch >= 0xd800 && ch <= 0xdbff) && (char_count > 0)) { + const uint16_t ch2 = *utf16_in; + + // Check if the other half of the pair is within the expected + // range. If it isn't, we will have to emit both "halves" as + // separate 3 byte sequences. + if (ch2 >= 0xdc00 && ch2 <= 0xdfff) { + utf16_in++; + char_count--; + const uint32_t code_point = (ch << 10) + ch2 - 0x035fdc00; + *utf8_out++ = (code_point >> 18) | 0xf0; + *utf8_out++ = ((code_point >> 12) & 0x3f) | 0x80; + *utf8_out++ = ((code_point >> 6) & 0x3f) | 0x80; + *utf8_out++ = (code_point & 0x3f) | 0x80; + continue; + } + } + if (ch > 0x07ff) { // Three byte encoding. *utf8_out++ = (ch >> 12) | 0xe0; @@ -272,40 +313,39 @@ static void codePointToSurrogatePair(uint32_t code_point, uint16_t &first, uint1 second = (code_point & 0x03ff) + 0xdc00; } -static void testConversions(uint16_t *buf, size_t char_count) { +static void testConversions(uint16_t *buf, int char_count) { + char bytes_test[8] = { 0 }, bytes_reference[8] = { 0 }; + uint16_t out_buf_test[4] = { 0 }, out_buf_reference[4] = { 0 }; + int byte_count_test, byte_count_reference; + int char_count_test, char_count_reference; + // Calculate the number of utf-8 bytes for the utf-16 chars. - size_t byte_count_reference = CountModifiedUtf8BytesInUtf16_reference(buf, char_count); - size_t byte_count_test = CountModifiedUtf8BytesInUtf16(buf, char_count); - ASSERT_EQ(byte_count_reference, byte_count_test); + byte_count_reference = CountModifiedUtf8BytesInUtf16_reference(buf, char_count); + byte_count_test = CountModifiedUtf8BytesInUtf16(buf, char_count); + EXPECT_EQ(byte_count_reference, byte_count_test); // Convert the utf-16 string to utf-8 bytes. - char bytes_test[8], bytes_reference[9]; - ASSERT_LT(byte_count_reference, arraysize(bytes_reference)); ConvertUtf16ToModifiedUtf8_reference(bytes_reference, buf, char_count); - ASSERT_LE(byte_count_test, arraysize(bytes_test)); ConvertUtf16ToModifiedUtf8(bytes_test, byte_count_test, buf, char_count); - for (size_t i = 0; i < byte_count_test; ++i) { - ASSERT_EQ(bytes_reference[i], bytes_test[i]); + for (int i = 0; i < byte_count_test; ++i) { + EXPECT_EQ(bytes_reference[i], bytes_test[i]); } // Calculate the number of utf-16 chars from the utf-8 bytes. bytes_reference[byte_count_reference] = 0; // Reference function needs null termination. - size_t char_count_reference = CountModifiedUtf8Chars_reference(bytes_reference); - size_t char_count_test = CountModifiedUtf8Chars(bytes_test, byte_count_test); - ASSERT_EQ(char_count, char_count_reference); - ASSERT_EQ(char_count, char_count_test); + char_count_reference = CountModifiedUtf8Chars_reference(bytes_reference); + char_count_test = CountModifiedUtf8Chars(bytes_test, byte_count_test); + EXPECT_EQ(char_count, char_count_reference); + EXPECT_EQ(char_count, char_count_test); // Convert the utf-8 bytes back to utf-16 chars. // Does not need copied _reference version of the function because the original // function with the old API is retained for debug/testing code. - uint16_t out_buf_test[4], out_buf_reference[4]; - ASSERT_LE(char_count_reference, arraysize(out_buf_reference)); ConvertModifiedUtf8ToUtf16(out_buf_reference, bytes_reference); - ASSERT_LE(char_count_test, arraysize(out_buf_test)); ConvertModifiedUtf8ToUtf16(out_buf_test, char_count_test, bytes_test, byte_count_test); - for (size_t i = 0; i < char_count_test; ++i) { - ASSERT_EQ(buf[i], out_buf_reference[i]); - ASSERT_EQ(buf[i], out_buf_test[i]); + for (int i = 0; i < char_count_test; ++i) { + EXPECT_EQ(buf[i], out_buf_reference[i]); + EXPECT_EQ(buf[i], out_buf_test[i]); } } diff --git a/test/183-non-bmp-package-name/expected-stderr.txt b/test/183-non-bmp-package-name/expected-stderr.txt deleted file mode 100644 index e69de29bb2..0000000000 --- a/test/183-non-bmp-package-name/expected-stderr.txt +++ /dev/null diff --git a/test/183-non-bmp-package-name/expected-stdout.txt b/test/183-non-bmp-package-name/expected-stdout.txt deleted file mode 100644 index 9058e58d7f..0000000000 --- a/test/183-non-bmp-package-name/expected-stdout.txt +++ /dev/null @@ -1,2 +0,0 @@ -JNI_OnLoad called -Invoke public abstract void pkg𐀀.PackageTestInterface.interfaceMethod() diff --git a/test/183-non-bmp-package-name/info.txt b/test/183-non-bmp-package-name/info.txt deleted file mode 100644 index 8b15b5ac3c..0000000000 --- a/test/183-non-bmp-package-name/info.txt +++ /dev/null @@ -1,7 +0,0 @@ -Regression test for bad handling of package name containing a character outside -the BMP plane. For a proxy class with a non-public interface in such a package, -this caused the package name comparison to fail because the dex file encoding -had two three-byte sequences while the descriptor was encoded with a four-byte -sequence, leading to IllegalArgumentException when calling a proxy method via -the interface. -Bug: 192935764 diff --git a/test/183-non-bmp-package-name/jni_find_class.cc b/test/183-non-bmp-package-name/jni_find_class.cc deleted file mode 100644 index 8a9e967277..0000000000 --- a/test/183-non-bmp-package-name/jni_find_class.cc +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2022 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "base/logging.h" -#include "jni.h" -#include "scoped_thread_state_change-inl.h" -#include "thread-current-inl.h" - -namespace art { - -extern "C" JNIEXPORT jclass JNICALL Java_Main_jniFindClass( - JNIEnv* env, jclass, jstring class_name) { - CHECK(class_name != nullptr); - // FIXME: We should test consistency with `env->GetStringUTFChars(...)` here - // but the JNI uses the wrong encoding. Bug: 238888095 - if ((true)) { - ScopedObjectAccess soa(Thread::Current()); - std::string name = soa.Decode<mirror::String>(class_name)->ToModifiedUtf8(); - jclass clazz = env->FindClass(name.c_str()); - return clazz; - } else { - const char* name = env->GetStringUTFChars(class_name, nullptr); - CHECK(name != nullptr); - jclass clazz = env->FindClass(name); - env->ReleaseStringUTFChars(class_name, name); - return clazz; - } -} - -} // namespace art - diff --git a/test/183-non-bmp-package-name/src/Main.java b/test/183-non-bmp-package-name/src/Main.java deleted file mode 100644 index 973da97cb8..0000000000 --- a/test/183-non-bmp-package-name/src/Main.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2022 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import pkg𐀀.PackageTest; - -public class Main { - public static void main(String[] args) { - System.loadLibrary(args[0]); - - PackageTest.main(); - } - - public static native Class<?> jniFindClass(String className); -} diff --git a/test/183-non-bmp-package-name/src/pkg𐀀/PackageTest.java b/test/183-non-bmp-package-name/src/pkg𐀀/PackageTest.java deleted file mode 100644 index 2b3b968c65..0000000000 --- a/test/183-non-bmp-package-name/src/pkg𐀀/PackageTest.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2022 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package pkg𐀀; - -import java.lang.reflect.InvocationHandler; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Constructor; -import java.lang.reflect.Method; -import java.lang.reflect.Proxy; - -public class PackageTest { - public static void main() { - testJniFindClass(); - testClassForName(); - testProxy(); - } - - private static void testJniFindClass() { - try { - // Cannot import "Main". Use reflection to call `Main.jniFindClass()`. - Class<?> mainClass = Class.forName("Main"); - Method jniFindClass = mainClass.getMethod("jniFindClass", String.class); - Class<?> klass = (Class<?>) jniFindClass.invoke(null, "pkg𐀀/PackageTest"); - if (klass != PackageTest.class) { - System.out.println("Unexpected class: " + klass); - } - } catch (InvocationTargetException ite) { - ite.getCause().printStackTrace(System.out); // Unwrap the cause. - } catch (Throwable t) { - t.printStackTrace(System.out); - } - } - - private static void testClassForName() { - try { - Class<?> klass = Class.forName("pkg𐀀.PackageTest"); - if (klass != PackageTest.class) { - System.out.println("Unexpected class: " + klass); - } - } catch (Throwable t) { - t.printStackTrace(System.out); - } - } - - private static void testProxy() { - try { - InvocationHandler handler = new PackageInvocationHandler(); - Class<?> proxyClass = Proxy.getProxyClass( - PackageTestInterface.class.getClassLoader(), PackageTestInterface.class); - Constructor<?> ctor = proxyClass.getConstructor(InvocationHandler.class); - Object proxy = ctor.newInstance(handler); - PackageTestInterface asInterface = (PackageTestInterface) proxy; - asInterface.interfaceMethod(); - } catch (Throwable t) { - t.printStackTrace(System.out); - } - } -} - -interface PackageTestInterface { - public void interfaceMethod(); -} - -class PackageInvocationHandler implements InvocationHandler { - public Object invoke(Object proxy, Method method, Object[] args) { - System.out.println("Invoke " + method); - return null; - } -} diff --git a/test/Android.bp b/test/Android.bp index 4111009303..c5322cbd72 100644 --- a/test/Android.bp +++ b/test/Android.bp @@ -938,7 +938,6 @@ cc_defaults { "177-visibly-initialized-deadlock/visibly_initialized.cc", "178-app-image-native-method/native_methods.cc", "179-nonvirtual-jni/nonvirtual-call.cc", - "183-non-bmp-package-name/jni_find_class.cc", "1945-proxy-method-arguments/get_args.cc", "203-multi-checkpoint/multi_checkpoint.cc", "305-other-fault-handler/fault_handler.cc", diff --git a/test/knownfailures.json b/test/knownfailures.json index 98156f4f2f..da318d9b8c 100644 --- a/test/knownfailures.json +++ b/test/knownfailures.json @@ -1131,7 +1131,6 @@ }, { "tests": ["178-app-image-native-method", - "183-non-bmp-package-name", "530-checker-peel-unroll", "616-cha-unloading", "674-hiddenapi", diff --git a/test/run_test_build.py b/test/run_test_build.py index fcaf1a54dc..75cd64f061 100755 --- a/test/run_test_build.py +++ b/test/run_test_build.py @@ -81,10 +81,7 @@ class BuildTestContext: self.hiddenapi = functools.partial(self.run, args.hiddenapi.absolute()) # RBE wrapper for some of the tools. - # RBE does not suport non-BMP characters in paths, so exclude "183-non-bmp-package-name". - # Bug: 264866780 - if ("RBE_server_address" in os.environ and USE_RBE > (hash(self.test_name) % 100) and - self.test_name != "183-non-bmp-package-name"): + if "RBE_server_address" in os.environ and USE_RBE > (hash(self.test_name) % 100): self.rbe_exec_root = os.environ.get("RBE_exec_root") self.rbe_rewrapper = self.android_build_top / "prebuilts/remoteexecution-client/live/rewrapper" if self.test_name not in RBE_D8_DISABLED_FOR: @@ -105,7 +102,6 @@ class BuildTestContext: "SMALI": args.smali.absolute(), "SOONG_ZIP": args.soong_zip.absolute(), "TEST_NAME": self.test_name, - "LANG": "en_US.UTF-8", # Needed to pass UTF-8 command line arguments. } def bash(self, cmd): diff --git a/test/ti-agent/ti_utf.h b/test/ti-agent/ti_utf.h index a585b6e5c9..15fe22ce5a 100644 --- a/test/ti-agent/ti_utf.h +++ b/test/ti-agent/ti_utf.h @@ -169,17 +169,19 @@ inline void ConvertUtf16ToModifiedUtf8(char* utf8_out, } // String contains non-ASCII characters. + // FIXME: We should not emit 4-byte sequences. Bug: 192935764 auto append = [&](char c) { *utf8_out++ = c; }; ConvertUtf16ToUtf8</*kUseShortZero=*/ false, - /*kUse4ByteSequence=*/ false, + /*kUse4ByteSequence=*/ true, /*kReplaceBadSurrogates=*/ false>(utf16_in, char_count, append); } inline size_t CountModifiedUtf8BytesInUtf16(const uint16_t* chars, size_t char_count) { + // FIXME: We should not emit 4-byte sequences. Bug: 192935764 size_t result = 0; auto append = [&](char c ATTRIBUTE_UNUSED) { ++result; }; ConvertUtf16ToUtf8</*kUseShortZero=*/ false, - /*kUse4ByteSequence=*/ false, + /*kUse4ByteSequence=*/ true, /*kReplaceBadSurrogates=*/ false>(chars, char_count, append); return result; } diff --git a/test/utils/regen-test-files b/test/utils/regen-test-files index 7b43d4ae3d..5b7247997d 100755 --- a/test/utils/regen-test-files +++ b/test/utils/regen-test-files @@ -399,9 +399,6 @@ class Generator: # Skip test with a copy of `sun.misc.Unsafe`. if os.path.isfile(os.path.join(run_test_path, "src", "sun", "misc", "Unsafe.java")): return False - # Ignore test with a non-ascii package name `pkg𐀀`. b/193141629 - if os.path.isdir(os.path.join(run_test_path, "src", "pkg𐀀")): - return False # Skip tests with Hidden API specs. if os.path.isfile(os.path.join(run_test_path, "hiddenapi-flags.csv")): return False |