summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Hans Boehm <hboehm@google.com> 2023-01-11 00:45:46 +0000
committer Gerrit Code Review <noreply-gerritcodereview@google.com> 2023-01-11 00:45:46 +0000
commit6183e758e124f2a96b3f52a3ec27c00f289ec464 (patch)
tree5c60c24004ebac413aa478dd3b3f18b5704457c8
parent5baa7724fb6ec13674678275ba70b30854a80477 (diff)
Revert "Reland^2 "Do not create 4-byte sequences in `ConvertUtf16ToModifiedUtf8()`""
This reverts commit 5baa7724fb6ec13674678275ba70b30854a80477. Reason for revert: Still seems to be triggering test failures. E.g. https://ci.chromium.org/ui/p/art/builders/ci/host-x86_64-cdex-fast/5926/overview Change-Id: I94ca1dd3bb21b25f94a9a563adc549de55670a80
-rw-r--r--libdexfile/dex/utf.cc12
-rw-r--r--libdexfile/dex/utf.h6
-rw-r--r--libdexfile/dex/utf_test.cc86
-rw-r--r--test/183-non-bmp-package-name/expected-stderr.txt0
-rw-r--r--test/183-non-bmp-package-name/expected-stdout.txt2
-rw-r--r--test/183-non-bmp-package-name/info.txt7
-rw-r--r--test/183-non-bmp-package-name/jni_find_class.cc44
-rw-r--r--test/183-non-bmp-package-name/src/Main.java27
-rw-r--r--test/183-non-bmp-package-name/src/pkg𐀀/PackageTest.java83
-rw-r--r--test/Android.bp1
-rw-r--r--test/knownfailures.json1
-rwxr-xr-xtest/run_test_build.py6
-rw-r--r--test/ti-agent/ti_utf.h6
-rwxr-xr-xtest/utils/regen-test-files3
14 files changed, 76 insertions, 208 deletions
diff --git a/libdexfile/dex/utf.cc b/libdexfile/dex/utf.cc
index ee55a09f32..9692a26827 100644
--- a/libdexfile/dex/utf.cc
+++ b/libdexfile/dex/utf.cc
@@ -121,10 +121,8 @@ void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, size_t out_chars,
}
}
-void ConvertUtf16ToModifiedUtf8(char* utf8_out,
- size_t byte_count,
- const uint16_t* utf16_in,
- size_t char_count) {
+void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
+ const uint16_t* utf16_in, size_t char_count) {
if (LIKELY(byte_count == char_count)) {
// Common case where all characters are ASCII.
const uint16_t *utf16_end = utf16_in + char_count;
@@ -135,9 +133,10 @@ void ConvertUtf16ToModifiedUtf8(char* utf8_out,
}
// String contains non-ASCII characters.
+ // FIXME: We should not emit 4-byte sequences. Bug: 192935764
auto append = [&](char c) { *utf8_out++ = c; };
ConvertUtf16ToUtf8</*kUseShortZero=*/ false,
- /*kUse4ByteSequence=*/ false,
+ /*kUse4ByteSequence=*/ true,
/*kReplaceBadSurrogates=*/ false>(utf16_in, char_count, append);
}
@@ -208,10 +207,11 @@ int CompareModifiedUtf8ToUtf16AsCodePointValues(const char* utf8, const uint16_t
}
size_t CountModifiedUtf8BytesInUtf16(const uint16_t* chars, size_t char_count) {
+ // FIXME: We should not emit 4-byte sequences. Bug: 192935764
size_t result = 0;
auto append = [&](char c ATTRIBUTE_UNUSED) { ++result; };
ConvertUtf16ToUtf8</*kUseShortZero=*/ false,
- /*kUse4ByteSequence=*/ false,
+ /*kUse4ByteSequence=*/ true,
/*kReplaceBadSurrogates=*/ false>(chars, char_count, append);
return result;
}
diff --git a/libdexfile/dex/utf.h b/libdexfile/dex/utf.h
index 86ca211450..d372bff662 100644
--- a/libdexfile/dex/utf.h
+++ b/libdexfile/dex/utf.h
@@ -90,10 +90,8 @@ size_t CountModifiedUtf8BytesInUtf16(const uint16_t* chars, size_t char_count);
* this anyway, so if you want a NUL-terminated string, you know where to
* put the NUL byte.
*/
-void ConvertUtf16ToModifiedUtf8(char* utf8_out,
- size_t byte_count,
- const uint16_t* utf16_in,
- size_t char_count);
+void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
+ const uint16_t* utf16_in, size_t char_count);
/*
* The java.lang.String hashCode() algorithm.
diff --git a/libdexfile/dex/utf_test.cc b/libdexfile/dex/utf_test.cc
index acf217f0c1..85c74d285c 100644
--- a/libdexfile/dex/utf_test.cc
+++ b/libdexfile/dex/utf_test.cc
@@ -126,8 +126,8 @@ static void AssertConversion(const std::vector<uint16_t>& input,
}
TEST_F(UtfTest, CountAndConvertUtf8Bytes) {
- // Surrogate pairs will be converted into two three-byte sequences.
- AssertConversion({ 0xd801, 0xdc00 }, { 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x80 });
+ // Surrogate pairs will be converted into 4 byte sequences.
+ AssertConversion({ 0xd801, 0xdc00 }, { 0xf0, 0x90, 0x90, 0x80 });
// Three byte encodings that are below & above the leading surrogate
// range respectively.
@@ -143,12 +143,12 @@ TEST_F(UtfTest, CountAndConvertUtf8Bytes) {
AssertConversion({ 'h', 'e', 'l', 'l', 'o' }, { 0x68, 0x65, 0x6c, 0x6c, 0x6f });
AssertConversion({
- 0xd802, 0xdc02, // Surrogate pair - three byte encodings.
+ 0xd802, 0xdc02, // Surrogate pair.
0xdef0, 0xdcff, // Three byte encodings.
0x0101, 0x0000, // Two byte encodings.
'p' , 'p' // One byte encoding.
}, {
- 0xed, 0xa0, 0x82, 0xed, 0xb0, 0x82,
+ 0xf0, 0x90, 0xa0, 0x82,
0xed, 0xbb, 0xb0, 0xed, 0xb3, 0xbf,
0xc4, 0x81, 0xc0, 0x80,
0x70, 0x70
@@ -235,6 +235,25 @@ static size_t CountModifiedUtf8BytesInUtf16_reference(const uint16_t* chars, siz
const uint16_t ch = *chars++;
if (ch > 0 && ch <= 0x7f) {
++result;
+ } else if (ch >= 0xd800 && ch <= 0xdbff) {
+ if (char_count > 0) {
+ const uint16_t ch2 = *chars;
+ // If we find a properly paired surrogate, we emit it as a 4 byte
+ // UTF sequence. If we find an unpaired leading or trailing surrogate,
+ // we emit it as a 3 byte sequence like would have done earlier.
+ if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+ chars++;
+ char_count--;
+
+ result += 4;
+ } else {
+ result += 3;
+ }
+ } else {
+ // This implies we found an unpaired trailing surrogate at the end
+ // of a string.
+ result += 3;
+ }
} else if (ch > 0x7ff) {
result += 3;
} else {
@@ -251,6 +270,28 @@ static void ConvertUtf16ToModifiedUtf8_reference(char* utf8_out, const uint16_t*
if (ch > 0 && ch <= 0x7f) {
*utf8_out++ = ch;
} else {
+ // Char_count == 0 here implies we've encountered an unpaired
+ // surrogate and we have no choice but to encode it as 3-byte UTF
+ // sequence. Note that unpaired surrogates can occur as a part of
+ // "normal" operation.
+ if ((ch >= 0xd800 && ch <= 0xdbff) && (char_count > 0)) {
+ const uint16_t ch2 = *utf16_in;
+
+ // Check if the other half of the pair is within the expected
+ // range. If it isn't, we will have to emit both "halves" as
+ // separate 3 byte sequences.
+ if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+ utf16_in++;
+ char_count--;
+ const uint32_t code_point = (ch << 10) + ch2 - 0x035fdc00;
+ *utf8_out++ = (code_point >> 18) | 0xf0;
+ *utf8_out++ = ((code_point >> 12) & 0x3f) | 0x80;
+ *utf8_out++ = ((code_point >> 6) & 0x3f) | 0x80;
+ *utf8_out++ = (code_point & 0x3f) | 0x80;
+ continue;
+ }
+ }
+
if (ch > 0x07ff) {
// Three byte encoding.
*utf8_out++ = (ch >> 12) | 0xe0;
@@ -272,40 +313,39 @@ static void codePointToSurrogatePair(uint32_t code_point, uint16_t &first, uint1
second = (code_point & 0x03ff) + 0xdc00;
}
-static void testConversions(uint16_t *buf, size_t char_count) {
+static void testConversions(uint16_t *buf, int char_count) {
+ char bytes_test[8] = { 0 }, bytes_reference[8] = { 0 };
+ uint16_t out_buf_test[4] = { 0 }, out_buf_reference[4] = { 0 };
+ int byte_count_test, byte_count_reference;
+ int char_count_test, char_count_reference;
+
// Calculate the number of utf-8 bytes for the utf-16 chars.
- size_t byte_count_reference = CountModifiedUtf8BytesInUtf16_reference(buf, char_count);
- size_t byte_count_test = CountModifiedUtf8BytesInUtf16(buf, char_count);
- ASSERT_EQ(byte_count_reference, byte_count_test);
+ byte_count_reference = CountModifiedUtf8BytesInUtf16_reference(buf, char_count);
+ byte_count_test = CountModifiedUtf8BytesInUtf16(buf, char_count);
+ EXPECT_EQ(byte_count_reference, byte_count_test);
// Convert the utf-16 string to utf-8 bytes.
- char bytes_test[8], bytes_reference[9];
- ASSERT_LT(byte_count_reference, arraysize(bytes_reference));
ConvertUtf16ToModifiedUtf8_reference(bytes_reference, buf, char_count);
- ASSERT_LE(byte_count_test, arraysize(bytes_test));
ConvertUtf16ToModifiedUtf8(bytes_test, byte_count_test, buf, char_count);
- for (size_t i = 0; i < byte_count_test; ++i) {
- ASSERT_EQ(bytes_reference[i], bytes_test[i]);
+ for (int i = 0; i < byte_count_test; ++i) {
+ EXPECT_EQ(bytes_reference[i], bytes_test[i]);
}
// Calculate the number of utf-16 chars from the utf-8 bytes.
bytes_reference[byte_count_reference] = 0; // Reference function needs null termination.
- size_t char_count_reference = CountModifiedUtf8Chars_reference(bytes_reference);
- size_t char_count_test = CountModifiedUtf8Chars(bytes_test, byte_count_test);
- ASSERT_EQ(char_count, char_count_reference);
- ASSERT_EQ(char_count, char_count_test);
+ char_count_reference = CountModifiedUtf8Chars_reference(bytes_reference);
+ char_count_test = CountModifiedUtf8Chars(bytes_test, byte_count_test);
+ EXPECT_EQ(char_count, char_count_reference);
+ EXPECT_EQ(char_count, char_count_test);
// Convert the utf-8 bytes back to utf-16 chars.
// Does not need copied _reference version of the function because the original
// function with the old API is retained for debug/testing code.
- uint16_t out_buf_test[4], out_buf_reference[4];
- ASSERT_LE(char_count_reference, arraysize(out_buf_reference));
ConvertModifiedUtf8ToUtf16(out_buf_reference, bytes_reference);
- ASSERT_LE(char_count_test, arraysize(out_buf_test));
ConvertModifiedUtf8ToUtf16(out_buf_test, char_count_test, bytes_test, byte_count_test);
- for (size_t i = 0; i < char_count_test; ++i) {
- ASSERT_EQ(buf[i], out_buf_reference[i]);
- ASSERT_EQ(buf[i], out_buf_test[i]);
+ for (int i = 0; i < char_count_test; ++i) {
+ EXPECT_EQ(buf[i], out_buf_reference[i]);
+ EXPECT_EQ(buf[i], out_buf_test[i]);
}
}
diff --git a/test/183-non-bmp-package-name/expected-stderr.txt b/test/183-non-bmp-package-name/expected-stderr.txt
deleted file mode 100644
index e69de29bb2..0000000000
--- a/test/183-non-bmp-package-name/expected-stderr.txt
+++ /dev/null
diff --git a/test/183-non-bmp-package-name/expected-stdout.txt b/test/183-non-bmp-package-name/expected-stdout.txt
deleted file mode 100644
index 9058e58d7f..0000000000
--- a/test/183-non-bmp-package-name/expected-stdout.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-JNI_OnLoad called
-Invoke public abstract void pkg𐀀.PackageTestInterface.interfaceMethod()
diff --git a/test/183-non-bmp-package-name/info.txt b/test/183-non-bmp-package-name/info.txt
deleted file mode 100644
index 8b15b5ac3c..0000000000
--- a/test/183-non-bmp-package-name/info.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-Regression test for bad handling of package name containing a character outside
-the BMP plane. For a proxy class with a non-public interface in such a package,
-this caused the package name comparison to fail because the dex file encoding
-had two three-byte sequences while the descriptor was encoded with a four-byte
-sequence, leading to IllegalArgumentException when calling a proxy method via
-the interface.
-Bug: 192935764
diff --git a/test/183-non-bmp-package-name/jni_find_class.cc b/test/183-non-bmp-package-name/jni_find_class.cc
deleted file mode 100644
index 8a9e967277..0000000000
--- a/test/183-non-bmp-package-name/jni_find_class.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2022 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "base/logging.h"
-#include "jni.h"
-#include "scoped_thread_state_change-inl.h"
-#include "thread-current-inl.h"
-
-namespace art {
-
-extern "C" JNIEXPORT jclass JNICALL Java_Main_jniFindClass(
- JNIEnv* env, jclass, jstring class_name) {
- CHECK(class_name != nullptr);
- // FIXME: We should test consistency with `env->GetStringUTFChars(...)` here
- // but the JNI uses the wrong encoding. Bug: 238888095
- if ((true)) {
- ScopedObjectAccess soa(Thread::Current());
- std::string name = soa.Decode<mirror::String>(class_name)->ToModifiedUtf8();
- jclass clazz = env->FindClass(name.c_str());
- return clazz;
- } else {
- const char* name = env->GetStringUTFChars(class_name, nullptr);
- CHECK(name != nullptr);
- jclass clazz = env->FindClass(name);
- env->ReleaseStringUTFChars(class_name, name);
- return clazz;
- }
-}
-
-} // namespace art
-
diff --git a/test/183-non-bmp-package-name/src/Main.java b/test/183-non-bmp-package-name/src/Main.java
deleted file mode 100644
index 973da97cb8..0000000000
--- a/test/183-non-bmp-package-name/src/Main.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2022 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import pkg𐀀.PackageTest;
-
-public class Main {
- public static void main(String[] args) {
- System.loadLibrary(args[0]);
-
- PackageTest.main();
- }
-
- public static native Class<?> jniFindClass(String className);
-}
diff --git a/test/183-non-bmp-package-name/src/pkg𐀀/PackageTest.java b/test/183-non-bmp-package-name/src/pkg𐀀/PackageTest.java
deleted file mode 100644
index 2b3b968c65..0000000000
--- a/test/183-non-bmp-package-name/src/pkg𐀀/PackageTest.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2022 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package pkg𐀀;
-
-import java.lang.reflect.InvocationHandler;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Method;
-import java.lang.reflect.Proxy;
-
-public class PackageTest {
- public static void main() {
- testJniFindClass();
- testClassForName();
- testProxy();
- }
-
- private static void testJniFindClass() {
- try {
- // Cannot import "Main". Use reflection to call `Main.jniFindClass()`.
- Class<?> mainClass = Class.forName("Main");
- Method jniFindClass = mainClass.getMethod("jniFindClass", String.class);
- Class<?> klass = (Class<?>) jniFindClass.invoke(null, "pkg𐀀/PackageTest");
- if (klass != PackageTest.class) {
- System.out.println("Unexpected class: " + klass);
- }
- } catch (InvocationTargetException ite) {
- ite.getCause().printStackTrace(System.out); // Unwrap the cause.
- } catch (Throwable t) {
- t.printStackTrace(System.out);
- }
- }
-
- private static void testClassForName() {
- try {
- Class<?> klass = Class.forName("pkg𐀀.PackageTest");
- if (klass != PackageTest.class) {
- System.out.println("Unexpected class: " + klass);
- }
- } catch (Throwable t) {
- t.printStackTrace(System.out);
- }
- }
-
- private static void testProxy() {
- try {
- InvocationHandler handler = new PackageInvocationHandler();
- Class<?> proxyClass = Proxy.getProxyClass(
- PackageTestInterface.class.getClassLoader(), PackageTestInterface.class);
- Constructor<?> ctor = proxyClass.getConstructor(InvocationHandler.class);
- Object proxy = ctor.newInstance(handler);
- PackageTestInterface asInterface = (PackageTestInterface) proxy;
- asInterface.interfaceMethod();
- } catch (Throwable t) {
- t.printStackTrace(System.out);
- }
- }
-}
-
-interface PackageTestInterface {
- public void interfaceMethod();
-}
-
-class PackageInvocationHandler implements InvocationHandler {
- public Object invoke(Object proxy, Method method, Object[] args) {
- System.out.println("Invoke " + method);
- return null;
- }
-}
diff --git a/test/Android.bp b/test/Android.bp
index 4111009303..c5322cbd72 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -938,7 +938,6 @@ cc_defaults {
"177-visibly-initialized-deadlock/visibly_initialized.cc",
"178-app-image-native-method/native_methods.cc",
"179-nonvirtual-jni/nonvirtual-call.cc",
- "183-non-bmp-package-name/jni_find_class.cc",
"1945-proxy-method-arguments/get_args.cc",
"203-multi-checkpoint/multi_checkpoint.cc",
"305-other-fault-handler/fault_handler.cc",
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 98156f4f2f..da318d9b8c 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -1131,7 +1131,6 @@
},
{
"tests": ["178-app-image-native-method",
- "183-non-bmp-package-name",
"530-checker-peel-unroll",
"616-cha-unloading",
"674-hiddenapi",
diff --git a/test/run_test_build.py b/test/run_test_build.py
index fcaf1a54dc..75cd64f061 100755
--- a/test/run_test_build.py
+++ b/test/run_test_build.py
@@ -81,10 +81,7 @@ class BuildTestContext:
self.hiddenapi = functools.partial(self.run, args.hiddenapi.absolute())
# RBE wrapper for some of the tools.
- # RBE does not suport non-BMP characters in paths, so exclude "183-non-bmp-package-name".
- # Bug: 264866780
- if ("RBE_server_address" in os.environ and USE_RBE > (hash(self.test_name) % 100) and
- self.test_name != "183-non-bmp-package-name"):
+ if "RBE_server_address" in os.environ and USE_RBE > (hash(self.test_name) % 100):
self.rbe_exec_root = os.environ.get("RBE_exec_root")
self.rbe_rewrapper = self.android_build_top / "prebuilts/remoteexecution-client/live/rewrapper"
if self.test_name not in RBE_D8_DISABLED_FOR:
@@ -105,7 +102,6 @@ class BuildTestContext:
"SMALI": args.smali.absolute(),
"SOONG_ZIP": args.soong_zip.absolute(),
"TEST_NAME": self.test_name,
- "LANG": "en_US.UTF-8", # Needed to pass UTF-8 command line arguments.
}
def bash(self, cmd):
diff --git a/test/ti-agent/ti_utf.h b/test/ti-agent/ti_utf.h
index a585b6e5c9..15fe22ce5a 100644
--- a/test/ti-agent/ti_utf.h
+++ b/test/ti-agent/ti_utf.h
@@ -169,17 +169,19 @@ inline void ConvertUtf16ToModifiedUtf8(char* utf8_out,
}
// String contains non-ASCII characters.
+ // FIXME: We should not emit 4-byte sequences. Bug: 192935764
auto append = [&](char c) { *utf8_out++ = c; };
ConvertUtf16ToUtf8</*kUseShortZero=*/ false,
- /*kUse4ByteSequence=*/ false,
+ /*kUse4ByteSequence=*/ true,
/*kReplaceBadSurrogates=*/ false>(utf16_in, char_count, append);
}
inline size_t CountModifiedUtf8BytesInUtf16(const uint16_t* chars, size_t char_count) {
+ // FIXME: We should not emit 4-byte sequences. Bug: 192935764
size_t result = 0;
auto append = [&](char c ATTRIBUTE_UNUSED) { ++result; };
ConvertUtf16ToUtf8</*kUseShortZero=*/ false,
- /*kUse4ByteSequence=*/ false,
+ /*kUse4ByteSequence=*/ true,
/*kReplaceBadSurrogates=*/ false>(chars, char_count, append);
return result;
}
diff --git a/test/utils/regen-test-files b/test/utils/regen-test-files
index 7b43d4ae3d..5b7247997d 100755
--- a/test/utils/regen-test-files
+++ b/test/utils/regen-test-files
@@ -399,9 +399,6 @@ class Generator:
# Skip test with a copy of `sun.misc.Unsafe`.
if os.path.isfile(os.path.join(run_test_path, "src", "sun", "misc", "Unsafe.java")):
return False
- # Ignore test with a non-ascii package name `pkg𐀀`. b/193141629
- if os.path.isdir(os.path.join(run_test_path, "src", "pkg𐀀")):
- return False
# Skip tests with Hidden API specs.
if os.path.isfile(os.path.join(run_test_path, "hiddenapi-flags.csv")):
return False