blob: 9086ee932d5019e60ad5e054407c77837621d505 [file] [log] [blame]
/*
* Copyright (C) 2008 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "java_lang_StringFactory.h"
#include "common_throws.h"
#include "handle_scope-inl.h"
#include "jni/jni_internal.h"
#include "mirror/object-inl.h"
#include "mirror/string-alloc-inl.h"
#include "native_util.h"
#include "nativehelper/jni_macros.h"
#include "nativehelper/scoped_local_ref.h"
#include "nativehelper/scoped_primitive_array.h"
#include "scoped_fast_native_object_access-inl.h"
#include "scoped_thread_state_change-inl.h"
namespace art {
static jstring StringFactory_newStringFromBytes(JNIEnv* env, jclass, jbyteArray java_data,
jint high, jint offset, jint byte_count) {
ScopedFastNativeObjectAccess soa(env);
if (UNLIKELY(java_data == nullptr)) {
ThrowNullPointerException("data == null");
return nullptr;
}
StackHandleScope<1> hs(soa.Self());
Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
int32_t data_size = byte_array->GetLength();
if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
"length=%d; regionStart=%d; regionLength=%d", data_size,
offset, byte_count);
return nullptr;
}
gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
ObjPtr<mirror::String> result = mirror::String::AllocFromByteArray(soa.Self(),
byte_count,
byte_array,
offset,
high,
allocator_type);
return soa.AddLocalReference<jstring>(result);
}
// The char array passed as `java_data` must not be a null reference.
static jstring StringFactory_newStringFromChars(JNIEnv* env, jclass, jint offset,
jint char_count, jcharArray java_data) {
DCHECK(java_data != nullptr);
ScopedFastNativeObjectAccess soa(env);
StackHandleScope<1> hs(soa.Self());
Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray>(java_data)));
gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
ObjPtr<mirror::String> result = mirror::String::AllocFromCharArray(soa.Self(),
char_count,
char_array,
offset,
allocator_type);
return soa.AddLocalReference<jstring>(result);
}
static jstring StringFactory_newStringFromString(JNIEnv* env, jclass, jstring to_copy) {
ScopedFastNativeObjectAccess soa(env);
if (UNLIKELY(to_copy == nullptr)) {
ThrowNullPointerException("toCopy == null");
return nullptr;
}
StackHandleScope<1> hs(soa.Self());
Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(to_copy)));
gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
ObjPtr<mirror::String> result = mirror::String::AllocFromString(soa.Self(),
string->GetLength(),
string,
/*offset=*/ 0,
allocator_type);
return soa.AddLocalReference<jstring>(result);
}
static jstring StringFactory_newStringFromUtf8Bytes(JNIEnv* env, jclass, jbyteArray java_data,
jint offset, jint byte_count) {
// Local Define in here
static const jchar kReplacementChar = 0xfffd;
static const int kDefaultBufferSize = 256;
static const int kTableUtf8Needed[] = {
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xc0 - 0xcf
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xd0 - 0xdf
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xe0 - 0xef
3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff
};
ScopedFastNativeObjectAccess soa(env);
if (UNLIKELY(java_data == nullptr)) {
ThrowNullPointerException("data == null");
return nullptr;
}
StackHandleScope<1> hs(soa.Self());
Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
int32_t data_size = byte_array->GetLength();
if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
"length=%d; regionStart=%d; regionLength=%d", data_size,
offset, byte_count);
return nullptr;
}
/*
* This code converts a UTF-8 byte sequence to a Java String (UTF-16).
* It implements the W3C recommended UTF-8 decoder.
* https://www.w3.org/TR/encoding/#utf-8-decoder
*
* Unicode 3.2 Well-Formed UTF-8 Byte Sequences
* Code Points First Second Third Fourth
* U+0000..U+007F 00..7F
* U+0080..U+07FF C2..DF 80..BF
* U+0800..U+0FFF E0 A0..BF 80..BF
* U+1000..U+CFFF E1..EC 80..BF 80..BF
* U+D000..U+D7FF ED 80..9F 80..BF
* U+E000..U+FFFF EE..EF 80..BF 80..BF
* U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
* U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
* U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
*
* Please refer to Unicode as the authority.
* p.126 Table 3-7 in http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
*
* Handling Malformed Input
* The maximal subpart should be replaced by a single U+FFFD. Maximal subpart is
* the longest code unit subsequence starting at an unconvertible offset that is either
* 1) the initial subsequence of a well-formed code unit sequence, or
* 2) a subsequence of length one:
* One U+FFFD should be emitted for every sequence of bytes that is an incomplete prefix
* of a valid sequence, and with the conversion to restart after the incomplete sequence.
*
* For example, in byte sequence "41 C0 AF 41 F4 80 80 41", the maximal subparts are
* "C0", "AF", and "F4 80 80". "F4 80 80" can be the initial subsequence of "F4 80 80 80",
* but "C0" can't be the initial subsequence of any well-formed code unit sequence.
* Thus, the output should be "A\ufffd\ufffdA\ufffdA".
*
* Please refer to section "Best Practices for Using U+FFFD." in
* http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
*/
// Initial value
jchar stack_buffer[kDefaultBufferSize];
std::unique_ptr<jchar[]> allocated_buffer;
jchar* v;
if (byte_count <= kDefaultBufferSize) {
v = stack_buffer;
} else {
allocated_buffer.reset(new jchar[byte_count]);
v = allocated_buffer.get();
}
jbyte* d = byte_array->GetData();
DCHECK(d != nullptr);
int idx = offset;
int last = offset + byte_count;
int s = 0;
int code_point = 0;
int utf8_bytes_seen = 0;
int utf8_bytes_needed = 0;
int lower_bound = 0x80;
int upper_bound = 0xbf;
while (idx < last) {
int b = d[idx++] & 0xff;
if (utf8_bytes_needed == 0) {
if ((b & 0x80) == 0) { // ASCII char. 0xxxxxxx
v[s++] = (jchar) b;
continue;
}
if ((b & 0x40) == 0) { // 10xxxxxx is illegal as first byte
v[s++] = kReplacementChar;
continue;
}
// 11xxxxxx
int tableLookupIndex = b & 0x3f;
utf8_bytes_needed = kTableUtf8Needed[tableLookupIndex];
if (utf8_bytes_needed == 0) {
v[s++] = kReplacementChar;
continue;
}
// utf8_bytes_needed
// 1: b & 0x1f
// 2: b & 0x0f
// 3: b & 0x07
code_point = b & (0x3f >> utf8_bytes_needed);
if (b == 0xe0) {
lower_bound = 0xa0;
} else if (b == 0xed) {
upper_bound = 0x9f;
} else if (b == 0xf0) {
lower_bound = 0x90;
} else if (b == 0xf4) {
upper_bound = 0x8f;
}
} else {
if (b < lower_bound || b > upper_bound) {
// The bytes seen are ill-formed. Substitute them with U+FFFD
v[s++] = kReplacementChar;
code_point = 0;
utf8_bytes_needed = 0;
utf8_bytes_seen = 0;
lower_bound = 0x80;
upper_bound = 0xbf;
/*
* According to the Unicode Standard,
* "a UTF-8 conversion process is required to never consume well-formed
* subsequences as part of its error handling for ill-formed subsequences"
* The current byte could be part of well-formed subsequences. Reduce the
* index by 1 to parse it in next loop.
*/
idx--;
continue;
}
lower_bound = 0x80;
upper_bound = 0xbf;
code_point = (code_point << 6) | (b & 0x3f);
utf8_bytes_seen++;
if (utf8_bytes_needed != utf8_bytes_seen) {
continue;
}
// Encode chars from U+10000 up as surrogate pairs
if (code_point < 0x10000) {
v[s++] = (jchar) code_point;
} else {
v[s++] = (jchar) ((code_point >> 10) + 0xd7c0);
v[s++] = (jchar) ((code_point & 0x3ff) + 0xdc00);
}
utf8_bytes_seen = 0;
utf8_bytes_needed = 0;
code_point = 0;
}
}
// The bytes seen are ill-formed. Substitute them by U+FFFD
if (utf8_bytes_needed != 0) {
v[s++] = kReplacementChar;
}
ObjPtr<mirror::String> result = mirror::String::AllocFromUtf16(soa.Self(), s, v);
return soa.AddLocalReference<jstring>(result);
}
static JNINativeMethod gMethods[] = {
FAST_NATIVE_METHOD(StringFactory, newStringFromBytes, "([BIII)Ljava/lang/String;"),
FAST_NATIVE_METHOD(StringFactory, newStringFromChars, "(II[C)Ljava/lang/String;"),
FAST_NATIVE_METHOD(StringFactory, newStringFromString, "(Ljava/lang/String;)Ljava/lang/String;"),
FAST_NATIVE_METHOD(StringFactory, newStringFromUtf8Bytes, "([BII)Ljava/lang/String;"),
};
void register_java_lang_StringFactory(JNIEnv* env) {
REGISTER_NATIVE_METHODS("java/lang/StringFactory");
}
} // namespace art