Consistent handling of modified UTF-8.
A recent set of patches had mismatched handling of UTF-8 vs modified
UTF-8; this change converges all paths towards using modified UTF-8
to match the DataInput/Output API contract.
New tests verify that underlying raw data is compatible between the
upstream and local implementations.
Bug: 171832118
Test: atest FrameworksCoreTests:android.util.CharsetUtilsTest
Test: atest FrameworksCoreTests:android.util.XmlTest
Test: atest FrameworksCoreTests:android.util.BinaryXmlTest
Test: atest FrameworksCoreTests:com.android.internal.util.FastDataTest
Change-Id: I49423edc867839fb6626cd8bd361abe7bc512633
diff --git a/apct-tests/perftests/core/src/android/util/CharsetUtilsPerfTest.java b/apct-tests/perftests/core/src/android/util/CharsetUtilsPerfTest.java
index 2a538b2..e2c580c 100644
--- a/apct-tests/perftests/core/src/android/util/CharsetUtilsPerfTest.java
+++ b/apct-tests/perftests/core/src/android/util/CharsetUtilsPerfTest.java
@@ -70,7 +70,7 @@
final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
while (state.keepRunning()) {
- CharsetUtils.toUtf8Bytes(mValue, destPtr, 0, dest.length);
+ CharsetUtils.toModifiedUtf8Bytes(mValue, destPtr, 0, dest.length);
}
}
@@ -85,7 +85,7 @@
final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
while (state.keepRunning()) {
- CharsetUtils.toUtf8Bytes(mValue, destPtr, 0, dest.length);
+ CharsetUtils.toModifiedUtf8Bytes(mValue, destPtr, 0, dest.length);
}
}
}
diff --git a/core/java/android/util/CharsetUtils.java b/core/java/android/util/CharsetUtils.java
index 80c2055..fa14667 100644
--- a/core/java/android/util/CharsetUtils.java
+++ b/core/java/android/util/CharsetUtils.java
@@ -31,34 +31,48 @@
*/
public class CharsetUtils {
/**
- * Attempt to encode the given string as UTF-8 into the destination byte
- * array without making any new allocations.
+ * Attempt to encode the given string as modified UTF-8 into the destination
+ * byte array without making any new allocations.
*
* @param src string value to be encoded
* @param dest destination byte array to encode into
* @param destOff offset into destination where encoding should begin
* @param destLen length of destination
- * @return the number of bytes written to the destination when encoded
- * successfully, otherwise {@code -1} if not large enough
+ * @return positive value when encoding succeeded, or negative value when
+ * failed; the magnitude of the value is the number of bytes
+ * required to encode the string.
*/
- public static int toUtf8Bytes(@NonNull String src,
+ public static int toModifiedUtf8Bytes(@NonNull String src,
long dest, int destOff, int destLen) {
- return toUtf8Bytes(src, src.length(), dest, destOff, destLen);
+ return toModifiedUtf8Bytes(src, src.length(), dest, destOff, destLen);
}
/**
- * Attempt to encode the given string as UTF-8 into the destination byte
- * array without making any new allocations.
+ * Attempt to encode the given string as modified UTF-8 into the destination
+ * byte array without making any new allocations.
*
* @param src string value to be encoded
* @param srcLen exact length of string to be encoded
* @param dest destination byte array to encode into
* @param destOff offset into destination where encoding should begin
* @param destLen length of destination
- * @return the number of bytes written to the destination when encoded
- * successfully, otherwise {@code -1} if not large enough
+ * @return positive value when encoding succeeded, or negative value when
+ * failed; the magnitude of the value is the number of bytes
+ * required to encode the string.
*/
@FastNative
- private static native int toUtf8Bytes(@NonNull String src, int srcLen,
+ private static native int toModifiedUtf8Bytes(@NonNull String src, int srcLen,
long dest, int destOff, int destLen);
+
+ /**
+ * Attempt to decode a modified UTF-8 string from the source byte array.
+ *
+ * @param src source byte array to decode from
+ * @param srcOff offset into source where decoding should begin
+ * @param srcLen length of source that should be decoded
+ * @return the successfully decoded string
+ */
+ @FastNative
+ public static native @NonNull String fromModifiedUtf8Bytes(
+ long src, int srcOff, int srcLen);
}
diff --git a/core/java/com/android/internal/util/FastDataInput.java b/core/java/com/android/internal/util/FastDataInput.java
index 2e8cb47..f8d241b 100644
--- a/core/java/com/android/internal/util/FastDataInput.java
+++ b/core/java/com/android/internal/util/FastDataInput.java
@@ -17,6 +17,9 @@
package com.android.internal.util;
import android.annotation.NonNull;
+import android.util.CharsetUtils;
+
+import dalvik.system.VMRuntime;
import java.io.BufferedInputStream;
import java.io.Closeable;
@@ -25,7 +28,6 @@
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Objects;
@@ -39,9 +41,11 @@
public class FastDataInput implements DataInput, Closeable {
private static final int MAX_UNSIGNED_SHORT = 65_535;
+ private final VMRuntime mRuntime;
private final InputStream mIn;
private final byte[] mBuffer;
+ private final long mBufferPtr;
private final int mBufferCap;
private int mBufferPos;
@@ -54,12 +58,14 @@
private String[] mStringRefs = new String[32];
public FastDataInput(@NonNull InputStream in, int bufferSize) {
+ mRuntime = VMRuntime.getRuntime();
mIn = Objects.requireNonNull(in);
if (bufferSize < 8) {
throw new IllegalArgumentException();
}
- mBuffer = new byte[bufferSize];
+ mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
+ mBufferPtr = mRuntime.addressOf(mBuffer);
mBufferCap = mBuffer.length;
}
@@ -123,15 +129,15 @@
// Attempt to read directly from buffer space if there's enough room,
// otherwise fall back to chunking into place
final int len = readUnsignedShort();
- if (mBufferCap >= len) {
+ if (mBufferCap > len) {
if (mBufferLim - mBufferPos < len) fill(len);
- final String res = new String(mBuffer, mBufferPos, len, StandardCharsets.UTF_8);
+ final String res = CharsetUtils.fromModifiedUtf8Bytes(mBufferPtr, mBufferPos, len);
mBufferPos += len;
return res;
} else {
- final byte[] tmp = new byte[len];
- readFully(tmp, 0, tmp.length);
- return new String(tmp, StandardCharsets.UTF_8);
+ final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
+ readFully(tmp, 0, len);
+ return CharsetUtils.fromModifiedUtf8Bytes(mRuntime.addressOf(tmp), 0, len);
}
}
diff --git a/core/java/com/android/internal/util/FastDataOutput.java b/core/java/com/android/internal/util/FastDataOutput.java
index 2530501..83d26e1 100644
--- a/core/java/com/android/internal/util/FastDataOutput.java
+++ b/core/java/com/android/internal/util/FastDataOutput.java
@@ -28,7 +28,6 @@
import java.io.Flushable;
import java.io.IOException;
import java.io.OutputStream;
-import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Objects;
@@ -42,6 +41,7 @@
public class FastDataOutput implements DataOutput, Flushable, Closeable {
private static final int MAX_UNSIGNED_SHORT = 65_535;
+ private final VMRuntime mRuntime;
private final OutputStream mOut;
private final byte[] mBuffer;
@@ -56,13 +56,14 @@
private HashMap<String, Short> mStringRefs = new HashMap<>();
public FastDataOutput(@NonNull OutputStream out, int bufferSize) {
+ mRuntime = VMRuntime.getRuntime();
mOut = Objects.requireNonNull(out);
if (bufferSize < 8) {
throw new IllegalArgumentException();
}
- mBuffer = (byte[]) VMRuntime.getRuntime().newNonMovableArray(byte.class, bufferSize);
- mBufferPtr = VMRuntime.getRuntime().addressOf(mBuffer);
+ mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
+ mBufferPtr = mRuntime.addressOf(mBuffer);
mBufferCap = mBuffer.length;
}
@@ -111,21 +112,28 @@
// Attempt to write directly to buffer space if there's enough room,
// otherwise fall back to chunking into place
if (mBufferCap - mBufferPos < 2 + s.length()) drain();
- final int res = CharsetUtils.toUtf8Bytes(s, mBufferPtr, mBufferPos + 2,
+
+ // Magnitude of this returned value indicates the number of bytes
+ // required to encode the string; sign indicates success/failure
+ int len = CharsetUtils.toModifiedUtf8Bytes(s, mBufferPtr, mBufferPos + 2,
mBufferCap - mBufferPos - 2);
- if (res >= 0) {
- if (res > MAX_UNSIGNED_SHORT) {
- throw new IOException("UTF-8 length too large: " + res);
- }
- writeShort(res);
- mBufferPos += res;
+ if (Math.abs(len) > MAX_UNSIGNED_SHORT) {
+ throw new IOException("Modified UTF-8 length too large: " + len);
+ }
+
+ if (len >= 0) {
+ // Positive value indicates the string was encoded into the buffer
+ // successfully, so we only need to prefix with length
+ writeShort(len);
+ mBufferPos += len;
} else {
- final byte[] tmp = s.getBytes(StandardCharsets.UTF_8);
- if (tmp.length > MAX_UNSIGNED_SHORT) {
- throw new IOException("UTF-8 length too large: " + res);
- }
- writeShort(tmp.length);
- write(tmp, 0, tmp.length);
+ // Negative value indicates buffer was too small and we need to
+ // allocate a temporary buffer for encoding
+ len = -len;
+ final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
+ CharsetUtils.toModifiedUtf8Bytes(s, mRuntime.addressOf(tmp), 0, tmp.length);
+ writeShort(len);
+ write(tmp, 0, len);
}
}
diff --git a/core/jni/android_util_CharsetUtils.cpp b/core/jni/android_util_CharsetUtils.cpp
index 3e1d4a7..7ab6e8f2 100644
--- a/core/jni/android_util_CharsetUtils.cpp
+++ b/core/jni/android_util_CharsetUtils.cpp
@@ -19,13 +19,14 @@
namespace android {
-static jint android_util_CharsetUtils_toUtf8Bytes(JNIEnv *env, jobject clazz,
+static jint android_util_CharsetUtils_toModifiedUtf8Bytes(JNIEnv *env, jobject clazz,
jstring src, jint srcLen, jlong dest, jint destOff, jint destLen) {
char *destPtr = reinterpret_cast<char*>(dest);
// Quickly check if destination has plenty of room for worst-case
// 4-bytes-per-char encoded size
- if (destOff >= 0 && destOff + (srcLen * 4) < destLen) {
+ const size_t worstLen = (srcLen * 4);
+ if (destOff >= 0 && destOff + worstLen < destLen) {
env->GetStringUTFRegion(src, 0, srcLen, destPtr + destOff);
return strlen(destPtr + destOff + srcLen) + srcLen;
}
@@ -38,13 +39,29 @@
return encodedLen;
}
- return -1;
+ return -encodedLen;
+}
+
+static jstring android_util_CharsetUtils_fromModifiedUtf8Bytes(JNIEnv *env, jobject clazz,
+ jlong src, jint srcOff, jint srcLen) {
+ char *srcPtr = reinterpret_cast<char*>(src);
+
+ // This is funky, but we need to temporarily swap a null byte so that
+ // JNI knows where the string ends; we'll put it back, we promise
+ char tmp = srcPtr[srcOff + srcLen];
+ srcPtr[srcOff + srcLen] = '\0';
+ jstring res = env->NewStringUTF(srcPtr + srcOff);
+ srcPtr[srcOff + srcLen] = tmp;
+ return res;
}
static const JNINativeMethod methods[] = {
// @FastNative
- {"toUtf8Bytes", "(Ljava/lang/String;IJII)I",
- (void*)android_util_CharsetUtils_toUtf8Bytes},
+ {"toModifiedUtf8Bytes", "(Ljava/lang/String;IJII)I",
+ (void*)android_util_CharsetUtils_toModifiedUtf8Bytes},
+ // @FastNative
+ {"fromModifiedUtf8Bytes", "(JII)Ljava/lang/String;",
+ (void*)android_util_CharsetUtils_fromModifiedUtf8Bytes},
};
int register_android_util_CharsetUtils(JNIEnv *env) {
diff --git a/core/tests/coretests/src/android/util/CharsetUtilsTest.java b/core/tests/coretests/src/android/util/CharsetUtilsTest.java
index 04cb3d7..c295451 100644
--- a/core/tests/coretests/src/android/util/CharsetUtilsTest.java
+++ b/core/tests/coretests/src/android/util/CharsetUtilsTest.java
@@ -40,37 +40,47 @@
}
@Test
- public void testUtf8_Empty() {
- assertEquals(0, CharsetUtils.toUtf8Bytes("", destPtr, 0, dest.length));
+ public void testModifiedUtf8_Empty() {
+ assertEquals(0, CharsetUtils.toModifiedUtf8Bytes("", destPtr, 0, dest.length));
assertEquals("0000000000000000", HexDump.toHexString(dest));
+ assertEquals("", CharsetUtils.fromModifiedUtf8Bytes(destPtr, 0, 0));
}
@Test
- public void testUtf8_Simple() {
- assertEquals(7, CharsetUtils.toUtf8Bytes("example", destPtr, 0, dest.length));
+ public void testModifiedUtf8_Null() {
+ assertEquals(4, CharsetUtils.toModifiedUtf8Bytes("!\0!", destPtr, 0, dest.length));
+ assertEquals("21C0802100000000", HexDump.toHexString(dest));
+ assertEquals("!\0!", CharsetUtils.fromModifiedUtf8Bytes(destPtr, 0, 4));
+ }
+
+ @Test
+ public void testModifiedUtf8_Simple() {
+ assertEquals(7, CharsetUtils.toModifiedUtf8Bytes("example", destPtr, 0, dest.length));
assertEquals("6578616D706C6500", HexDump.toHexString(dest));
+ assertEquals("example", CharsetUtils.fromModifiedUtf8Bytes(destPtr, 0, 7));
}
@Test
- public void testUtf8_Complex() {
- assertEquals(3, CharsetUtils.toUtf8Bytes("☃", destPtr, 4, dest.length));
+ public void testModifiedUtf8_Complex() {
+ assertEquals(3, CharsetUtils.toModifiedUtf8Bytes("☃", destPtr, 4, dest.length));
assertEquals("00000000E2988300", HexDump.toHexString(dest));
+ assertEquals("☃", CharsetUtils.fromModifiedUtf8Bytes(destPtr, 4, 3));
}
@Test
- public void testUtf8_Bounds() {
- assertEquals(-1, CharsetUtils.toUtf8Bytes("foo", destPtr, 0, 0));
- assertEquals(-1, CharsetUtils.toUtf8Bytes("foo", destPtr, 0, 2));
- assertEquals(-1, CharsetUtils.toUtf8Bytes("foo", destPtr, -2, 8));
- assertEquals(-1, CharsetUtils.toUtf8Bytes("foo", destPtr, 6, 8));
- assertEquals(-1, CharsetUtils.toUtf8Bytes("foo", destPtr, 10, 8));
+ public void testModifiedUtf8_Bounds() {
+ assertEquals(-3, CharsetUtils.toModifiedUtf8Bytes("foo", destPtr, 0, 0));
+ assertEquals(-3, CharsetUtils.toModifiedUtf8Bytes("foo", destPtr, 0, 2));
+ assertEquals(-3, CharsetUtils.toModifiedUtf8Bytes("foo", destPtr, -2, 8));
+ assertEquals(-3, CharsetUtils.toModifiedUtf8Bytes("foo", destPtr, 6, 8));
+ assertEquals(-3, CharsetUtils.toModifiedUtf8Bytes("foo", destPtr, 10, 8));
}
@Test
- public void testUtf8_Overwrite() {
- assertEquals(5, CharsetUtils.toUtf8Bytes("!!!!!", destPtr, 0, dest.length));
- assertEquals(3, CharsetUtils.toUtf8Bytes("...", destPtr, 0, dest.length));
- assertEquals(1, CharsetUtils.toUtf8Bytes("?", destPtr, 0, dest.length));
+ public void testModifiedUtf8_Overwrite() {
+ assertEquals(5, CharsetUtils.toModifiedUtf8Bytes("!!!!!", destPtr, 0, dest.length));
+ assertEquals(3, CharsetUtils.toModifiedUtf8Bytes("...", destPtr, 0, dest.length));
+ assertEquals(1, CharsetUtils.toModifiedUtf8Bytes("?", destPtr, 0, dest.length));
assertEquals("3F002E0021000000", HexDump.toHexString(dest));
}
}
diff --git a/core/tests/coretests/src/android/util/XmlTest.java b/core/tests/coretests/src/android/util/XmlTest.java
index 2ae9cdf..a30381a 100644
--- a/core/tests/coretests/src/android/util/XmlTest.java
+++ b/core/tests/coretests/src/android/util/XmlTest.java
@@ -64,7 +64,7 @@
*/
private static void doLargeValues(TypedXmlSerializer out, TypedXmlPullParser in)
throws Exception {
- final char[] chars = new char[(1 << 16) - 1];
+ final char[] chars = new char[65_534];
Arrays.fill(chars, '!');
final String string = new String(chars);
diff --git a/core/tests/coretests/src/com/android/internal/util/FastDataTest.java b/core/tests/coretests/src/com/android/internal/util/FastDataTest.java
index 841d659..81fb39f 100644
--- a/core/tests/coretests/src/com/android/internal/util/FastDataTest.java
+++ b/core/tests/coretests/src/com/android/internal/util/FastDataTest.java
@@ -30,6 +30,10 @@
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
@@ -83,7 +87,7 @@
@Test
public void testUTF_Bounds() throws Exception {
- final char[] buf = new char[65_535];
+ final char[] buf = new char[65_534];
try (FastDataOutput out = new FastDataOutput(new ByteArrayOutputStream(), BOUNCE_SIZE)) {
// Writing simple string will fit fine
Arrays.fill(buf, '!');
@@ -100,6 +104,61 @@
}
@Test
+ public void testTranscode() throws Exception {
+ // Verify that upstream data can be read by fast
+ {
+ final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ final DataOutputStream out = new DataOutputStream(outStream);
+ doTranscodeWrite(out);
+ out.flush();
+
+ final FastDataInput in = new FastDataInput(
+ new ByteArrayInputStream(outStream.toByteArray()), BOUNCE_SIZE);
+ doTransodeRead(in);
+ }
+
+ // Verify that fast data can be read by upstream
+ {
+ final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ final FastDataOutput out = new FastDataOutput(outStream, BOUNCE_SIZE);
+ doTranscodeWrite(out);
+ out.flush();
+
+ final DataInputStream in = new DataInputStream(
+ new ByteArrayInputStream(outStream.toByteArray()));
+ doTransodeRead(in);
+ }
+ }
+
+ private static void doTranscodeWrite(DataOutput out) throws IOException {
+ out.writeBoolean(true);
+ out.writeBoolean(false);
+ out.writeByte(1);
+ out.writeShort(2);
+ out.writeInt(4);
+ out.writeUTF("foo\0bar");
+ out.writeUTF(TEST_SHORT_STRING);
+ out.writeUTF(TEST_LONG_STRING);
+ out.writeLong(8L);
+ out.writeFloat(16f);
+ out.writeDouble(32d);
+ }
+
+ private static void doTransodeRead(DataInput in) throws IOException {
+ assertEquals(true, in.readBoolean());
+ assertEquals(false, in.readBoolean());
+ assertEquals(1, in.readByte());
+ assertEquals(2, in.readShort());
+ assertEquals(4, in.readInt());
+ assertEquals("foo\0bar", in.readUTF());
+ assertEquals(TEST_SHORT_STRING, in.readUTF());
+ assertEquals(TEST_LONG_STRING, in.readUTF());
+ assertEquals(8L, in.readLong());
+ assertEquals(16f, in.readFloat(), 0.01);
+ assertEquals(32d, in.readDouble(), 0.01);
+ }
+
+ @Test
public void testBounce_Char() throws Exception {
doBounce((out) -> {
out.writeChar('\0');
@@ -191,7 +250,7 @@
@Test
public void testBounce_UTF_Maximum() throws Exception {
- final char[] expectedBuf = new char[65_535];
+ final char[] expectedBuf = new char[65_534];
Arrays.fill(expectedBuf, '!');
final String expected = new String(expectedBuf);