blob: 38634e6d0ce0d119133a9cc5848b25c63c4f3090 [file] [log] [blame]
Jeff Hao848f70a2014-01-15 13:49:50 -08001/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Andreas Gampea14100c2017-04-24 15:09:56 -070017#include "libcore_util_CharsetUtils.h"
18
19#include <string.h>
20
21#include "nativehelper/jni_macros.h"
22
Jeff Hao848f70a2014-01-15 13:49:50 -080023#include "jni_internal.h"
24#include "mirror/string.h"
25#include "mirror/string-inl.h"
Andreas Gampe87583b32017-05-25 11:22:18 -070026#include "native_util.h"
Mathieu Chartier0795f232016-09-27 18:43:30 -070027#include "scoped_fast_native_object_access-inl.h"
Jeff Hao848f70a2014-01-15 13:49:50 -080028#include "ScopedPrimitiveArray.h"
29#include "unicode/utf16.h"
30
Jeff Hao848f70a2014-01-15 13:49:50 -080031
32namespace art {
33
34/**
35 * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into
36 * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly
37 * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s.
38 *
39 * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only
40 * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie
41 * to the garbage collector (nor hide potentially large allocations from it).
42 *
43 * Because a call to append might require an allocation, it might fail. Callers should always
44 * check the return value of append.
45 */
46class NativeUnsafeByteSequence {
47 public:
48 explicit NativeUnsafeByteSequence(JNIEnv* env)
49 : mEnv(env), mJavaArray(nullptr), mRawArray(nullptr), mSize(-1), mOffset(0) {
50 }
51
52 ~NativeUnsafeByteSequence() {
53 // Release our pointer to the raw array, copying changes back to the Java heap.
54 if (mRawArray != nullptr) {
55 mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0);
56 }
57 }
58
59 bool append(jbyte b) {
60 if (mOffset == mSize && !resize(mSize * 2)) {
61 return false;
62 }
63 mRawArray[mOffset++] = b;
64 return true;
65 }
66
67 bool resize(int newSize) {
68 if (newSize == mSize) {
69 return true;
70 }
71
72 // Allocate a new array.
73 jbyteArray newJavaArray = mEnv->NewByteArray(newSize);
74 if (newJavaArray == nullptr) {
75 return false;
76 }
77 jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, nullptr);
78 if (newRawArray == nullptr) {
79 return false;
80 }
81
82 // Copy data out of the old array and then let go of it.
83 // Note that we may be trimming the array.
84 if (mRawArray != nullptr) {
85 memcpy(newRawArray, mRawArray, mOffset);
86 mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT);
87 mEnv->DeleteLocalRef(mJavaArray);
88 }
89
90 // Point ourselves at the new array.
91 mJavaArray = newJavaArray;
92 mRawArray = newRawArray;
93 mSize = newSize;
94 return true;
95 }
96
97 jbyteArray toByteArray() {
98 // Trim any unused space, if necessary.
99 bool okay = resize(mOffset);
100 return okay ? mJavaArray : nullptr;
101 }
102
103 private:
104 JNIEnv* mEnv;
105 jbyteArray mJavaArray;
106 jbyte* mRawArray;
107 jint mSize;
108 jint mOffset;
109
110 // Disallow copy and assignment.
111 NativeUnsafeByteSequence(const NativeUnsafeByteSequence&);
112 void operator=(const NativeUnsafeByteSequence&);
113};
114
115static void CharsetUtils_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset,
116 jint length, jcharArray javaChars) {
117 ScopedByteArrayRO bytes(env, javaBytes);
118 if (bytes.get() == nullptr) {
119 return;
120 }
121 ScopedCharArrayRW chars(env, javaChars);
122 if (chars.get() == nullptr) {
123 return;
124 }
125
126 const jbyte* src = &bytes[offset];
127 jchar* dst = &chars[0];
128 static const jchar REPLACEMENT_CHAR = 0xfffd;
129 for (int i = length - 1; i >= 0; --i) {
130 jchar ch = static_cast<jchar>(*src++ & 0xff);
131 *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR;
132 }
133}
134
135static void CharsetUtils_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes,
136 jint offset, jint length, jcharArray javaChars) {
137 ScopedByteArrayRO bytes(env, javaBytes);
138 if (bytes.get() == nullptr) {
139 return;
140 }
141 ScopedCharArrayRW chars(env, javaChars);
142 if (chars.get() == nullptr) {
143 return;
144 }
145
146 const jbyte* src = &bytes[offset];
147 jchar* dst = &chars[0];
148 for (int i = length - 1; i >= 0; --i) {
149 *dst++ = static_cast<jchar>(*src++ & 0xff);
150 }
151}
152
153/**
154 * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that
155 * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while
156 * U+0000 to U+00ff inclusive are identical to ISO-8859-1.
157 */
158static jbyteArray charsToBytes(JNIEnv* env, jstring java_string, jint offset, jint length,
159 jchar maxValidChar) {
160 ScopedObjectAccess soa(env);
161 StackHandleScope<1> hs(soa.Self());
Mathieu Chartier0795f232016-09-27 18:43:30 -0700162 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string)));
Andreas Gampefa4333d2017-02-14 11:10:34 -0800163 if (string == nullptr) {
Jeff Hao848f70a2014-01-15 13:49:50 -0800164 return nullptr;
165 }
166
167 jbyteArray javaBytes = env->NewByteArray(length);
168 ScopedByteArrayRW bytes(env, javaBytes);
169 if (bytes.get() == nullptr) {
170 return nullptr;
171 }
172
Jeff Hao848f70a2014-01-15 13:49:50 -0800173 jbyte* dst = &bytes[0];
jessicahandojo3aaa37b2016-07-29 14:46:37 -0700174 for (int i = 0; i < length; ++i) {
175 jchar ch = string->CharAt(offset + i);
Jeff Hao848f70a2014-01-15 13:49:50 -0800176 if (ch > maxValidChar) {
177 ch = '?';
178 }
179 *dst++ = static_cast<jbyte>(ch);
180 }
181
182 return javaBytes;
183}
184
185static jbyteArray CharsetUtils_toAsciiBytes(JNIEnv* env, jclass, jstring java_string, jint offset,
186 jint length) {
187 return charsToBytes(env, java_string, offset, length, 0x7f);
188}
189
190static jbyteArray CharsetUtils_toIsoLatin1Bytes(JNIEnv* env, jclass, jstring java_string,
191 jint offset, jint length) {
192 return charsToBytes(env, java_string, offset, length, 0xff);
193}
194
195static jbyteArray CharsetUtils_toUtf8Bytes(JNIEnv* env, jclass, jstring java_string, jint offset,
196 jint length) {
197 ScopedObjectAccess soa(env);
198 StackHandleScope<1> hs(soa.Self());
Mathieu Chartier0795f232016-09-27 18:43:30 -0700199 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string)));
Andreas Gampefa4333d2017-02-14 11:10:34 -0800200 if (string == nullptr) {
Jeff Hao848f70a2014-01-15 13:49:50 -0800201 return nullptr;
202 }
203
204 NativeUnsafeByteSequence out(env);
205 if (!out.resize(length)) {
206 return nullptr;
207 }
208
209 const int end = offset + length;
210 for (int i = offset; i < end; ++i) {
211 jint ch = string->CharAt(i);
212 if (ch < 0x80) {
213 // One byte.
214 if (!out.append(ch)) {
215 return nullptr;
216 }
217 } else if (ch < 0x800) {
218 // Two bytes.
219 if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) {
220 return nullptr;
221 }
222 } else if (U16_IS_SURROGATE(ch)) {
223 // A supplementary character.
224 jchar high = static_cast<jchar>(ch);
225 jchar low = (i + 1 != end) ? string->CharAt(i + 1) : 0;
226 if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) {
227 if (!out.append('?')) {
228 return nullptr;
229 }
230 continue;
231 }
232 // Now we know we have a *valid* surrogate pair, we can consume the low surrogate.
233 ++i;
234 ch = U16_GET_SUPPLEMENTARY(high, low);
235 // Four bytes.
236 jbyte b1 = (ch >> 18) | 0xf0;
237 jbyte b2 = ((ch >> 12) & 0x3f) | 0x80;
238 jbyte b3 = ((ch >> 6) & 0x3f) | 0x80;
239 jbyte b4 = (ch & 0x3f) | 0x80;
240 if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) {
241 return nullptr;
242 }
243 } else {
244 // Three bytes.
245 jbyte b1 = (ch >> 12) | 0xe0;
246 jbyte b2 = ((ch >> 6) & 0x3f) | 0x80;
247 jbyte b3 = (ch & 0x3f) | 0x80;
248 if (!out.append(b1) || !out.append(b2) || !out.append(b3)) {
249 return nullptr;
250 }
251 }
252 }
253 return out.toByteArray();
254}
255
256static JNINativeMethod gMethods[] = {
Igor Murashkin3b6f4402017-02-16 16:13:17 -0800257 FAST_NATIVE_METHOD(CharsetUtils, asciiBytesToChars, "([BII[C)V"),
258 FAST_NATIVE_METHOD(CharsetUtils, isoLatin1BytesToChars, "([BII[C)V"),
259 FAST_NATIVE_METHOD(CharsetUtils, toAsciiBytes, "(Ljava/lang/String;II)[B"),
260 FAST_NATIVE_METHOD(CharsetUtils, toIsoLatin1Bytes, "(Ljava/lang/String;II)[B"),
261 FAST_NATIVE_METHOD(CharsetUtils, toUtf8Bytes, "(Ljava/lang/String;II)[B"),
Jeff Hao848f70a2014-01-15 13:49:50 -0800262};
263
264void register_libcore_util_CharsetUtils(JNIEnv* env) {
265 REGISTER_NATIVE_METHODS("libcore/util/CharsetUtils");
266}
267
268} // namespace art