Consistent handling of modified UTF-8.

A recent set of patches had mismatched handling of UTF-8 vs modified
UTF-8; this change converges all paths towards using modified UTF-8
to match the DataInput/Output API contract.

New tests verify that underlying raw data is compatible between the
upstream and local implementations.

Bug: 171832118
Test: atest FrameworksCoreTests:android.util.CharsetUtilsTest
Test: atest FrameworksCoreTests:android.util.XmlTest
Test: atest FrameworksCoreTests:android.util.BinaryXmlTest
Test: atest FrameworksCoreTests:com.android.internal.util.FastDataTest
Change-Id: I49423edc867839fb6626cd8bd361abe7bc512633
diff --git a/apct-tests/perftests/core/src/android/util/CharsetUtilsPerfTest.java b/apct-tests/perftests/core/src/android/util/CharsetUtilsPerfTest.java
index 2a538b2..e2c580c 100644
--- a/apct-tests/perftests/core/src/android/util/CharsetUtilsPerfTest.java
+++ b/apct-tests/perftests/core/src/android/util/CharsetUtilsPerfTest.java
@@ -70,7 +70,7 @@
 
         final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
         while (state.keepRunning()) {
-            CharsetUtils.toUtf8Bytes(mValue, destPtr, 0, dest.length);
+            CharsetUtils.toModifiedUtf8Bytes(mValue, destPtr, 0, dest.length);
         }
     }
 
@@ -85,7 +85,7 @@
 
         final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
         while (state.keepRunning()) {
-            CharsetUtils.toUtf8Bytes(mValue, destPtr, 0, dest.length);
+            CharsetUtils.toModifiedUtf8Bytes(mValue, destPtr, 0, dest.length);
        }
     }
 }
diff --git a/core/java/android/util/CharsetUtils.java b/core/java/android/util/CharsetUtils.java
index 80c2055..fa14667 100644
--- a/core/java/android/util/CharsetUtils.java
+++ b/core/java/android/util/CharsetUtils.java
@@ -31,34 +31,48 @@
  */
 public class CharsetUtils {
     /**
-     * Attempt to encode the given string as UTF-8 into the destination byte
-     * array without making any new allocations.
+     * Attempt to encode the given string as modified UTF-8 into the destination
+     * byte array without making any new allocations.
      *
      * @param src string value to be encoded
      * @param dest destination byte array to encode into
      * @param destOff offset into destination where encoding should begin
      * @param destLen length of destination
-     * @return the number of bytes written to the destination when encoded
-     *         successfully, otherwise {@code -1} if not large enough
+     * @return positive value when encoding succeeded, or negative value when
+     *         failed; the magnitude of the value is the number of bytes
+     *         required to encode the string.
      */
-    public static int toUtf8Bytes(@NonNull String src,
+    public static int toModifiedUtf8Bytes(@NonNull String src,
             long dest, int destOff, int destLen) {
-        return toUtf8Bytes(src, src.length(), dest, destOff, destLen);
+        return toModifiedUtf8Bytes(src, src.length(), dest, destOff, destLen);
     }
 
     /**
-     * Attempt to encode the given string as UTF-8 into the destination byte
-     * array without making any new allocations.
+     * Attempt to encode the given string as modified UTF-8 into the destination
+     * byte array without making any new allocations.
      *
      * @param src string value to be encoded
      * @param srcLen exact length of string to be encoded
      * @param dest destination byte array to encode into
      * @param destOff offset into destination where encoding should begin
      * @param destLen length of destination
-     * @return the number of bytes written to the destination when encoded
-     *         successfully, otherwise {@code -1} if not large enough
+     * @return positive value when encoding succeeded, or negative value when
+     *         failed; the magnitude of the value is the number of bytes
+     *         required to encode the string.
      */
     @FastNative
-    private static native int toUtf8Bytes(@NonNull String src, int srcLen,
+    private static native int toModifiedUtf8Bytes(@NonNull String src, int srcLen,
             long dest, int destOff, int destLen);
+
+    /**
+     * Attempt to decode a modified UTF-8 string from the source byte array.
+     *
+     * @param src source byte array to decode from
+     * @param srcOff offset into source where decoding should begin
+     * @param srcLen length of source that should be decoded
+     * @return the successfully decoded string
+     */
+    @FastNative
+    public static native @NonNull String fromModifiedUtf8Bytes(
+            long src, int srcOff, int srcLen);
 }
diff --git a/core/java/com/android/internal/util/FastDataInput.java b/core/java/com/android/internal/util/FastDataInput.java
index 2e8cb47..f8d241b 100644
--- a/core/java/com/android/internal/util/FastDataInput.java
+++ b/core/java/com/android/internal/util/FastDataInput.java
@@ -17,6 +17,9 @@
 package com.android.internal.util;
 
 import android.annotation.NonNull;
+import android.util.CharsetUtils;
+
+import dalvik.system.VMRuntime;
 
 import java.io.BufferedInputStream;
 import java.io.Closeable;
@@ -25,7 +28,6 @@
 import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Objects;
 
@@ -39,9 +41,11 @@
 public class FastDataInput implements DataInput, Closeable {
     private static final int MAX_UNSIGNED_SHORT = 65_535;
 
+    private final VMRuntime mRuntime;
     private final InputStream mIn;
 
     private final byte[] mBuffer;
+    private final long mBufferPtr;
     private final int mBufferCap;
 
     private int mBufferPos;
@@ -54,12 +58,14 @@
     private String[] mStringRefs = new String[32];
 
     public FastDataInput(@NonNull InputStream in, int bufferSize) {
+        mRuntime = VMRuntime.getRuntime();
         mIn = Objects.requireNonNull(in);
         if (bufferSize < 8) {
             throw new IllegalArgumentException();
         }
 
-        mBuffer = new byte[bufferSize];
+        mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
+        mBufferPtr = mRuntime.addressOf(mBuffer);
         mBufferCap = mBuffer.length;
     }
 
@@ -123,15 +129,15 @@
         // Attempt to read directly from buffer space if there's enough room,
         // otherwise fall back to chunking into place
         final int len = readUnsignedShort();
-        if (mBufferCap >= len) {
+        if (mBufferCap > len) {
             if (mBufferLim - mBufferPos < len) fill(len);
-            final String res = new String(mBuffer, mBufferPos, len, StandardCharsets.UTF_8);
+            final String res = CharsetUtils.fromModifiedUtf8Bytes(mBufferPtr, mBufferPos, len);
             mBufferPos += len;
             return res;
         } else {
-            final byte[] tmp = new byte[len];
-            readFully(tmp, 0, tmp.length);
-            return new String(tmp, StandardCharsets.UTF_8);
+            final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
+            readFully(tmp, 0, len);
+            return CharsetUtils.fromModifiedUtf8Bytes(mRuntime.addressOf(tmp), 0, len);
         }
     }
 
diff --git a/core/java/com/android/internal/util/FastDataOutput.java b/core/java/com/android/internal/util/FastDataOutput.java
index 2530501..83d26e1 100644
--- a/core/java/com/android/internal/util/FastDataOutput.java
+++ b/core/java/com/android/internal/util/FastDataOutput.java
@@ -28,7 +28,6 @@
 import java.io.Flushable;
 import java.io.IOException;
 import java.io.OutputStream;
-import java.nio.charset.StandardCharsets;
 import java.util.HashMap;
 import java.util.Objects;
 
@@ -42,6 +41,7 @@
 public class FastDataOutput implements DataOutput, Flushable, Closeable {
     private static final int MAX_UNSIGNED_SHORT = 65_535;
 
+    private final VMRuntime mRuntime;
     private final OutputStream mOut;
 
     private final byte[] mBuffer;
@@ -56,13 +56,14 @@
     private HashMap<String, Short> mStringRefs = new HashMap<>();
 
     public FastDataOutput(@NonNull OutputStream out, int bufferSize) {
+        mRuntime = VMRuntime.getRuntime();
         mOut = Objects.requireNonNull(out);
         if (bufferSize < 8) {
             throw new IllegalArgumentException();
         }
 
-        mBuffer = (byte[]) VMRuntime.getRuntime().newNonMovableArray(byte.class, bufferSize);
-        mBufferPtr = VMRuntime.getRuntime().addressOf(mBuffer);
+        mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
+        mBufferPtr = mRuntime.addressOf(mBuffer);
         mBufferCap = mBuffer.length;
     }
 
@@ -111,21 +112,28 @@
         // Attempt to write directly to buffer space if there's enough room,
         // otherwise fall back to chunking into place
         if (mBufferCap - mBufferPos < 2 + s.length()) drain();
-        final int res = CharsetUtils.toUtf8Bytes(s, mBufferPtr, mBufferPos + 2,
+
+        // Magnitude of this returned value indicates the number of bytes
+        // required to encode the string; sign indicates success/failure
+        int len = CharsetUtils.toModifiedUtf8Bytes(s, mBufferPtr, mBufferPos + 2,
                 mBufferCap - mBufferPos - 2);
-        if (res >= 0) {
-            if (res > MAX_UNSIGNED_SHORT) {
-                throw new IOException("UTF-8 length too large: " + res);
-            }
-            writeShort(res);
-            mBufferPos += res;
+        if (Math.abs(len) > MAX_UNSIGNED_SHORT) {
+            throw new IOException("Modified UTF-8 length too large: " + len);
+        }
+
+        if (len >= 0) {
+            // Positive value indicates the string was encoded into the buffer
+            // successfully, so we only need to prefix with length
+            writeShort(len);
+            mBufferPos += len;
         } else {
-            final byte[] tmp = s.getBytes(StandardCharsets.UTF_8);
-            if (tmp.length > MAX_UNSIGNED_SHORT) {
-                throw new IOException("UTF-8 length too large: " + res);
-            }
-            writeShort(tmp.length);
-            write(tmp, 0, tmp.length);
+            // Negative value indicates buffer was too small and we need to
+            // allocate a temporary buffer for encoding
+            len = -len;
+            final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
+            CharsetUtils.toModifiedUtf8Bytes(s, mRuntime.addressOf(tmp), 0, tmp.length);
+            writeShort(len);
+            write(tmp, 0, len);
         }
     }
 
diff --git a/core/jni/android_util_CharsetUtils.cpp b/core/jni/android_util_CharsetUtils.cpp
index 3e1d4a7..7ab6e8f2 100644
--- a/core/jni/android_util_CharsetUtils.cpp
+++ b/core/jni/android_util_CharsetUtils.cpp
@@ -19,13 +19,14 @@
 
 namespace android {
 
-static jint android_util_CharsetUtils_toUtf8Bytes(JNIEnv *env, jobject clazz,
+static jint android_util_CharsetUtils_toModifiedUtf8Bytes(JNIEnv *env, jobject clazz,
         jstring src, jint srcLen, jlong dest, jint destOff, jint destLen) {
     char *destPtr = reinterpret_cast<char*>(dest);
 
     // Quickly check if destination has plenty of room for worst-case
     // 4-bytes-per-char encoded size
-    if (destOff >= 0 && destOff + (srcLen * 4) < destLen) {
+    const size_t worstLen = (srcLen * 4);
+    if (destOff >= 0 && destOff + worstLen < destLen) {
         env->GetStringUTFRegion(src, 0, srcLen, destPtr + destOff);
         return strlen(destPtr + destOff + srcLen) + srcLen;
     }
@@ -38,13 +39,29 @@
         return encodedLen;
     }
 
-    return -1;
+    return -encodedLen;
+}
+
+static jstring android_util_CharsetUtils_fromModifiedUtf8Bytes(JNIEnv *env, jobject clazz,
+        jlong src, jint srcOff, jint srcLen) {
+    char *srcPtr = reinterpret_cast<char*>(src);
+
+    // This is funky, but we need to temporarily swap a null byte so that
+    // JNI knows where the string ends; we'll put it back, we promise
+    char tmp = srcPtr[srcOff + srcLen];
+    srcPtr[srcOff + srcLen] = '\0';
+    jstring res = env->NewStringUTF(srcPtr + srcOff);
+    srcPtr[srcOff + srcLen] = tmp;
+    return res;
 }
 
 static const JNINativeMethod methods[] = {
     // @FastNative
-    {"toUtf8Bytes",      "(Ljava/lang/String;IJII)I",
-            (void*)android_util_CharsetUtils_toUtf8Bytes},
+    {"toModifiedUtf8Bytes",      "(Ljava/lang/String;IJII)I",
+            (void*)android_util_CharsetUtils_toModifiedUtf8Bytes},
+    // @FastNative
+    {"fromModifiedUtf8Bytes",    "(JII)Ljava/lang/String;",
+            (void*)android_util_CharsetUtils_fromModifiedUtf8Bytes},
 };
 
 int register_android_util_CharsetUtils(JNIEnv *env) {
diff --git a/core/tests/coretests/src/android/util/CharsetUtilsTest.java b/core/tests/coretests/src/android/util/CharsetUtilsTest.java
index 04cb3d7..c295451 100644
--- a/core/tests/coretests/src/android/util/CharsetUtilsTest.java
+++ b/core/tests/coretests/src/android/util/CharsetUtilsTest.java
@@ -40,37 +40,47 @@
     }
 
     @Test
-    public void testUtf8_Empty() {
-        assertEquals(0, CharsetUtils.toUtf8Bytes("", destPtr, 0, dest.length));
+    public void testModifiedUtf8_Empty() {
+        assertEquals(0, CharsetUtils.toModifiedUtf8Bytes("", destPtr, 0, dest.length));
         assertEquals("0000000000000000", HexDump.toHexString(dest));
+        assertEquals("", CharsetUtils.fromModifiedUtf8Bytes(destPtr, 0, 0));
     }
 
     @Test
-    public void testUtf8_Simple() {
-        assertEquals(7, CharsetUtils.toUtf8Bytes("example", destPtr, 0, dest.length));
+    public void testModifiedUtf8_Null() {
+        assertEquals(4, CharsetUtils.toModifiedUtf8Bytes("!\0!", destPtr, 0, dest.length));
+        assertEquals("21C0802100000000", HexDump.toHexString(dest));
+        assertEquals("!\0!", CharsetUtils.fromModifiedUtf8Bytes(destPtr, 0, 4));
+    }
+
+    @Test
+    public void testModifiedUtf8_Simple() {
+        assertEquals(7, CharsetUtils.toModifiedUtf8Bytes("example", destPtr, 0, dest.length));
         assertEquals("6578616D706C6500", HexDump.toHexString(dest));
+        assertEquals("example", CharsetUtils.fromModifiedUtf8Bytes(destPtr, 0, 7));
     }
 
     @Test
-    public void testUtf8_Complex() {
-        assertEquals(3, CharsetUtils.toUtf8Bytes("☃", destPtr, 4, dest.length));
+    public void testModifiedUtf8_Complex() {
+        assertEquals(3, CharsetUtils.toModifiedUtf8Bytes("☃", destPtr, 4, dest.length));
         assertEquals("00000000E2988300", HexDump.toHexString(dest));
+        assertEquals("☃", CharsetUtils.fromModifiedUtf8Bytes(destPtr, 4, 3));
     }
 
     @Test
-    public void testUtf8_Bounds() {
-        assertEquals(-1, CharsetUtils.toUtf8Bytes("foo", destPtr, 0, 0));
-        assertEquals(-1, CharsetUtils.toUtf8Bytes("foo", destPtr, 0, 2));
-        assertEquals(-1, CharsetUtils.toUtf8Bytes("foo", destPtr, -2, 8));
-        assertEquals(-1, CharsetUtils.toUtf8Bytes("foo", destPtr, 6, 8));
-        assertEquals(-1, CharsetUtils.toUtf8Bytes("foo", destPtr, 10, 8));
+    public void testModifiedUtf8_Bounds() {
+        assertEquals(-3, CharsetUtils.toModifiedUtf8Bytes("foo", destPtr, 0, 0));
+        assertEquals(-3, CharsetUtils.toModifiedUtf8Bytes("foo", destPtr, 0, 2));
+        assertEquals(-3, CharsetUtils.toModifiedUtf8Bytes("foo", destPtr, -2, 8));
+        assertEquals(-3, CharsetUtils.toModifiedUtf8Bytes("foo", destPtr, 6, 8));
+        assertEquals(-3, CharsetUtils.toModifiedUtf8Bytes("foo", destPtr, 10, 8));
     }
 
     @Test
-    public void testUtf8_Overwrite() {
-        assertEquals(5, CharsetUtils.toUtf8Bytes("!!!!!", destPtr, 0, dest.length));
-        assertEquals(3, CharsetUtils.toUtf8Bytes("...", destPtr, 0, dest.length));
-        assertEquals(1, CharsetUtils.toUtf8Bytes("?", destPtr, 0, dest.length));
+    public void testModifiedUtf8_Overwrite() {
+        assertEquals(5, CharsetUtils.toModifiedUtf8Bytes("!!!!!", destPtr, 0, dest.length));
+        assertEquals(3, CharsetUtils.toModifiedUtf8Bytes("...", destPtr, 0, dest.length));
+        assertEquals(1, CharsetUtils.toModifiedUtf8Bytes("?", destPtr, 0, dest.length));
         assertEquals("3F002E0021000000", HexDump.toHexString(dest));
     }
 }
diff --git a/core/tests/coretests/src/android/util/XmlTest.java b/core/tests/coretests/src/android/util/XmlTest.java
index 2ae9cdf..a30381a 100644
--- a/core/tests/coretests/src/android/util/XmlTest.java
+++ b/core/tests/coretests/src/android/util/XmlTest.java
@@ -64,7 +64,7 @@
      */
     private static void doLargeValues(TypedXmlSerializer out, TypedXmlPullParser in)
             throws Exception {
-        final char[] chars = new char[(1 << 16) - 1];
+        final char[] chars = new char[65_534];
         Arrays.fill(chars, '!');
 
         final String string = new String(chars);
diff --git a/core/tests/coretests/src/com/android/internal/util/FastDataTest.java b/core/tests/coretests/src/com/android/internal/util/FastDataTest.java
index 841d659..81fb39f 100644
--- a/core/tests/coretests/src/com/android/internal/util/FastDataTest.java
+++ b/core/tests/coretests/src/com/android/internal/util/FastDataTest.java
@@ -30,6 +30,10 @@
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
 import java.io.EOFException;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
@@ -83,7 +87,7 @@
 
     @Test
     public void testUTF_Bounds() throws Exception {
-        final char[] buf = new char[65_535];
+        final char[] buf = new char[65_534];
         try (FastDataOutput out = new FastDataOutput(new ByteArrayOutputStream(), BOUNCE_SIZE)) {
             // Writing simple string will fit fine
             Arrays.fill(buf, '!');
@@ -100,6 +104,61 @@
     }
 
     @Test
+    public void testTranscode() throws Exception {
+        // Verify that upstream data can be read by fast
+        {
+            final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+            final DataOutputStream out = new DataOutputStream(outStream);
+            doTranscodeWrite(out);
+            out.flush();
+
+            final FastDataInput in = new FastDataInput(
+                    new ByteArrayInputStream(outStream.toByteArray()), BOUNCE_SIZE);
+            doTransodeRead(in);
+        }
+
+        // Verify that fast data can be read by upstream
+        {
+            final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+            final FastDataOutput out = new FastDataOutput(outStream, BOUNCE_SIZE);
+            doTranscodeWrite(out);
+            out.flush();
+
+            final DataInputStream in = new DataInputStream(
+                    new ByteArrayInputStream(outStream.toByteArray()));
+            doTransodeRead(in);
+        }
+    }
+
+    private static void doTranscodeWrite(DataOutput out) throws IOException {
+        out.writeBoolean(true);
+        out.writeBoolean(false);
+        out.writeByte(1);
+        out.writeShort(2);
+        out.writeInt(4);
+        out.writeUTF("foo\0bar");
+        out.writeUTF(TEST_SHORT_STRING);
+        out.writeUTF(TEST_LONG_STRING);
+        out.writeLong(8L);
+        out.writeFloat(16f);
+        out.writeDouble(32d);
+    }
+
+    private static void doTransodeRead(DataInput in) throws IOException {
+        assertEquals(true, in.readBoolean());
+        assertEquals(false, in.readBoolean());
+        assertEquals(1, in.readByte());
+        assertEquals(2, in.readShort());
+        assertEquals(4, in.readInt());
+        assertEquals("foo\0bar", in.readUTF());
+        assertEquals(TEST_SHORT_STRING, in.readUTF());
+        assertEquals(TEST_LONG_STRING, in.readUTF());
+        assertEquals(8L, in.readLong());
+        assertEquals(16f, in.readFloat(), 0.01);
+        assertEquals(32d, in.readDouble(), 0.01);
+    }
+
+    @Test
     public void testBounce_Char() throws Exception {
         doBounce((out) -> {
             out.writeChar('\0');
@@ -191,7 +250,7 @@
 
     @Test
     public void testBounce_UTF_Maximum() throws Exception {
-        final char[] expectedBuf = new char[65_535];
+        final char[] expectedBuf = new char[65_534];
         Arrays.fill(expectedBuf, '!');
         final String expected = new String(expectedBuf);