| /* |
| * Copyright (C) 2008 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "java_lang_StringFactory.h" |
| |
| #include "common_throws.h" |
| #include "handle_scope-inl.h" |
| #include "jni/jni_internal.h" |
| #include "mirror/object-inl.h" |
| #include "mirror/string-alloc-inl.h" |
| #include "native_util.h" |
| #include "nativehelper/jni_macros.h" |
| #include "nativehelper/scoped_local_ref.h" |
| #include "nativehelper/scoped_primitive_array.h" |
| #include "scoped_fast_native_object_access-inl.h" |
| #include "scoped_thread_state_change-inl.h" |
| |
| namespace art { |
| |
| static jstring StringFactory_newStringFromBytes(JNIEnv* env, jclass, jbyteArray java_data, |
| jint high, jint offset, jint byte_count) { |
| ScopedFastNativeObjectAccess soa(env); |
| if (UNLIKELY(java_data == nullptr)) { |
| ThrowNullPointerException("data == null"); |
| return nullptr; |
| } |
| StackHandleScope<1> hs(soa.Self()); |
| Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data))); |
| int32_t data_size = byte_array->GetLength(); |
| if ((offset | byte_count) < 0 || byte_count > data_size - offset) { |
| soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;", |
| "length=%d; regionStart=%d; regionLength=%d", data_size, |
| offset, byte_count); |
| return nullptr; |
| } |
| gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); |
| ObjPtr<mirror::String> result = mirror::String::AllocFromByteArray(soa.Self(), |
| byte_count, |
| byte_array, |
| offset, |
| high, |
| allocator_type); |
| return soa.AddLocalReference<jstring>(result); |
| } |
| |
| // The char array passed as `java_data` must not be a null reference. |
| static jstring StringFactory_newStringFromChars(JNIEnv* env, jclass, jint offset, |
| jint char_count, jcharArray java_data) { |
| DCHECK(java_data != nullptr); |
| ScopedFastNativeObjectAccess soa(env); |
| StackHandleScope<1> hs(soa.Self()); |
| Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray>(java_data))); |
| gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); |
| ObjPtr<mirror::String> result = mirror::String::AllocFromCharArray(soa.Self(), |
| char_count, |
| char_array, |
| offset, |
| allocator_type); |
| return soa.AddLocalReference<jstring>(result); |
| } |
| |
| static jstring StringFactory_newStringFromString(JNIEnv* env, jclass, jstring to_copy) { |
| ScopedFastNativeObjectAccess soa(env); |
| if (UNLIKELY(to_copy == nullptr)) { |
| ThrowNullPointerException("toCopy == null"); |
| return nullptr; |
| } |
| StackHandleScope<1> hs(soa.Self()); |
| Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(to_copy))); |
| gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); |
| ObjPtr<mirror::String> result = mirror::String::AllocFromString(soa.Self(), |
| string->GetLength(), |
| string, |
| /*offset=*/ 0, |
| allocator_type); |
| return soa.AddLocalReference<jstring>(result); |
| } |
| |
| static jstring StringFactory_newStringFromUtf8Bytes(JNIEnv* env, jclass, jbyteArray java_data, |
| jint offset, jint byte_count) { |
| // Local Define in here |
| static const jchar kReplacementChar = 0xfffd; |
| static const int kDefaultBufferSize = 256; |
| static const int kTableUtf8Needed[] = { |
| // 0 1 2 3 4 5 6 7 8 9 a b c d e f |
| 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xc0 - 0xcf |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xd0 - 0xdf |
| 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xe0 - 0xef |
| 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff |
| }; |
| |
| ScopedFastNativeObjectAccess soa(env); |
| if (UNLIKELY(java_data == nullptr)) { |
| ThrowNullPointerException("data == null"); |
| return nullptr; |
| } |
| |
| StackHandleScope<1> hs(soa.Self()); |
| Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data))); |
| int32_t data_size = byte_array->GetLength(); |
| if ((offset | byte_count) < 0 || byte_count > data_size - offset) { |
| soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;", |
| "length=%d; regionStart=%d; regionLength=%d", data_size, |
| offset, byte_count); |
| return nullptr; |
| } |
| |
| /* |
| * This code converts a UTF-8 byte sequence to a Java String (UTF-16). |
| * It implements the W3C recommended UTF-8 decoder. |
| * https://www.w3.org/TR/encoding/#utf-8-decoder |
| * |
| * Unicode 3.2 Well-Formed UTF-8 Byte Sequences |
| * Code Points First Second Third Fourth |
| * U+0000..U+007F 00..7F |
| * U+0080..U+07FF C2..DF 80..BF |
| * U+0800..U+0FFF E0 A0..BF 80..BF |
| * U+1000..U+CFFF E1..EC 80..BF 80..BF |
| * U+D000..U+D7FF ED 80..9F 80..BF |
| * U+E000..U+FFFF EE..EF 80..BF 80..BF |
| * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF |
| * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF |
| * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF |
| * |
| * Please refer to Unicode as the authority. |
| * p.126 Table 3-7 in http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf |
| * |
| * Handling Malformed Input |
| * The maximal subpart should be replaced by a single U+FFFD. Maximal subpart is |
| * the longest code unit subsequence starting at an unconvertible offset that is either |
| * 1) the initial subsequence of a well-formed code unit sequence, or |
| * 2) a subsequence of length one: |
| * One U+FFFD should be emitted for every sequence of bytes that is an incomplete prefix |
| * of a valid sequence, and with the conversion to restart after the incomplete sequence. |
| * |
| * For example, in byte sequence "41 C0 AF 41 F4 80 80 41", the maximal subparts are |
| * "C0", "AF", and "F4 80 80". "F4 80 80" can be the initial subsequence of "F4 80 80 80", |
| * but "C0" can't be the initial subsequence of any well-formed code unit sequence. |
| * Thus, the output should be "A\ufffd\ufffdA\ufffdA". |
| * |
| * Please refer to section "Best Practices for Using U+FFFD." in |
| * http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf |
| */ |
| |
| // Initial value |
| jchar stack_buffer[kDefaultBufferSize]; |
| std::unique_ptr<jchar[]> allocated_buffer; |
| jchar* v; |
| if (byte_count <= kDefaultBufferSize) { |
| v = stack_buffer; |
| } else { |
| allocated_buffer.reset(new jchar[byte_count]); |
| v = allocated_buffer.get(); |
| } |
| |
| jbyte* d = byte_array->GetData(); |
| DCHECK(d != nullptr); |
| |
| int idx = offset; |
| int last = offset + byte_count; |
| int s = 0; |
| |
| int code_point = 0; |
| int utf8_bytes_seen = 0; |
| int utf8_bytes_needed = 0; |
| int lower_bound = 0x80; |
| int upper_bound = 0xbf; |
| while (idx < last) { |
| int b = d[idx++] & 0xff; |
| if (utf8_bytes_needed == 0) { |
| if ((b & 0x80) == 0) { // ASCII char. 0xxxxxxx |
| v[s++] = (jchar) b; |
| continue; |
| } |
| |
| if ((b & 0x40) == 0) { // 10xxxxxx is illegal as first byte |
| v[s++] = kReplacementChar; |
| continue; |
| } |
| |
| // 11xxxxxx |
| int tableLookupIndex = b & 0x3f; |
| utf8_bytes_needed = kTableUtf8Needed[tableLookupIndex]; |
| if (utf8_bytes_needed == 0) { |
| v[s++] = kReplacementChar; |
| continue; |
| } |
| |
| // utf8_bytes_needed |
| // 1: b & 0x1f |
| // 2: b & 0x0f |
| // 3: b & 0x07 |
| code_point = b & (0x3f >> utf8_bytes_needed); |
| if (b == 0xe0) { |
| lower_bound = 0xa0; |
| } else if (b == 0xed) { |
| upper_bound = 0x9f; |
| } else if (b == 0xf0) { |
| lower_bound = 0x90; |
| } else if (b == 0xf4) { |
| upper_bound = 0x8f; |
| } |
| } else { |
| if (b < lower_bound || b > upper_bound) { |
| // The bytes seen are ill-formed. Substitute them with U+FFFD |
| v[s++] = kReplacementChar; |
| code_point = 0; |
| utf8_bytes_needed = 0; |
| utf8_bytes_seen = 0; |
| lower_bound = 0x80; |
| upper_bound = 0xbf; |
| /* |
| * According to the Unicode Standard, |
| * "a UTF-8 conversion process is required to never consume well-formed |
| * subsequences as part of its error handling for ill-formed subsequences" |
| * The current byte could be part of well-formed subsequences. Reduce the |
| * index by 1 to parse it in next loop. |
| */ |
| idx--; |
| continue; |
| } |
| |
| lower_bound = 0x80; |
| upper_bound = 0xbf; |
| code_point = (code_point << 6) | (b & 0x3f); |
| utf8_bytes_seen++; |
| if (utf8_bytes_needed != utf8_bytes_seen) { |
| continue; |
| } |
| |
| // Encode chars from U+10000 up as surrogate pairs |
| if (code_point < 0x10000) { |
| v[s++] = (jchar) code_point; |
| } else { |
| v[s++] = (jchar) ((code_point >> 10) + 0xd7c0); |
| v[s++] = (jchar) ((code_point & 0x3ff) + 0xdc00); |
| } |
| |
| utf8_bytes_seen = 0; |
| utf8_bytes_needed = 0; |
| code_point = 0; |
| } |
| } |
| |
| // The bytes seen are ill-formed. Substitute them by U+FFFD |
| if (utf8_bytes_needed != 0) { |
| v[s++] = kReplacementChar; |
| } |
| |
| ObjPtr<mirror::String> result = mirror::String::AllocFromUtf16(soa.Self(), s, v); |
| return soa.AddLocalReference<jstring>(result); |
| } |
| |
| static JNINativeMethod gMethods[] = { |
| FAST_NATIVE_METHOD(StringFactory, newStringFromBytes, "([BIII)Ljava/lang/String;"), |
| FAST_NATIVE_METHOD(StringFactory, newStringFromChars, "(II[C)Ljava/lang/String;"), |
| FAST_NATIVE_METHOD(StringFactory, newStringFromString, "(Ljava/lang/String;)Ljava/lang/String;"), |
| FAST_NATIVE_METHOD(StringFactory, newStringFromUtf8Bytes, "([BII)Ljava/lang/String;"), |
| }; |
| |
| void register_java_lang_StringFactory(JNIEnv* env) { |
| REGISTER_NATIVE_METHODS("java/lang/StringFactory"); |
| } |
| |
| } // namespace art |