creating workflow for mirror::String compression
All-ASCII String characters are stored in 8-bit blocks
instead of 16-bit. The compression has not taken place, but all
workflow are in the code already (changing kUseStringCompression in
string.h file to TRUE will enable the feature)
Notes: Feature works on interpreter only without optimizing
Test art: m ART_TEST_INTERPRETER=true ART_TEST_OPTIMIZING=false
test-art-host
Also tested with String tests from libcore/:
1. libcore.java.lang.StringTest
2. libcore.java.lang.StringBufferTest
3. libcore.java.lang.StringBuilderTest
4. libcore.java.lang.OldStringTest
5. libcore.java.lang.OldStringBufferTest
Memory improvement is 33% (from 6.03% to 4.03%, total String memory
from all apps per total memory of all apps) measured on Angler
with Hprof tools
Bug: 31040547
Change-Id: I9cc92c265ebf1305fc06b5fc33efd83797660cce
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index d3660e5..bc39ea8 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
#ifndef ART_RUNTIME_MIRROR_STRING_INL_H_
#define ART_RUNTIME_MIRROR_STRING_INL_H_
@@ -49,6 +48,7 @@
// Avoid AsString as object is not yet in live bitmap or allocation stack.
String* string = down_cast<String*>(obj);
string->SetCount(count_);
+ DCHECK(!string->IsCompressed() || kUseStringCompression);
}
private:
@@ -68,10 +68,19 @@
// Avoid AsString as object is not yet in live bitmap or allocation stack.
String* string = down_cast<String*>(obj);
string->SetCount(count_);
- uint16_t* value = string->GetValue();
+ DCHECK(!string->IsCompressed() || kUseStringCompression);
+ int32_t length = String::GetLengthFromCount(count_);
const uint8_t* const src = reinterpret_cast<uint8_t*>(src_array_->GetData()) + offset_;
- for (int i = 0; i < count_; i++) {
- value[i] = high_byte_ + (src[i] & 0xFF);
+ if (string->IsCompressed()) {
+ uint8_t* valueCompressed = string->GetValueCompressed();
+ for (int i = 0; i < length; i++) {
+ valueCompressed[i] = (src[i] & 0xFF);
+ }
+ } else {
+ uint16_t* value = string->GetValue();
+ for (int i = 0; i < length; i++) {
+ value[i] = high_byte_ + (src[i] & 0xFF);
+ }
}
}
@@ -96,7 +105,16 @@
String* string = down_cast<String*>(obj);
string->SetCount(count_);
const uint16_t* const src = src_array_->GetData() + offset_;
- memcpy(string->GetValue(), src, count_ * sizeof(uint16_t));
+ const int32_t length = String::GetLengthFromCount(count_);
+ bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_);
+ DCHECK(!compressible || kUseStringCompression);
+ if (compressible) {
+ for (int i = 0; i < length; ++i) {
+ string->GetValueCompressed()[i] = static_cast<uint8_t>(src[i]);
+ }
+ } else {
+ memcpy(string->GetValue(), src, length * sizeof(uint16_t));
+ }
}
private:
@@ -118,8 +136,22 @@
// Avoid AsString as object is not yet in live bitmap or allocation stack.
String* string = down_cast<String*>(obj);
string->SetCount(count_);
- const uint16_t* const src = src_string_->GetValue() + offset_;
- memcpy(string->GetValue(), src, count_ * sizeof(uint16_t));
+ const int32_t length = String::GetLengthFromCount(count_);
+ bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_);
+ DCHECK(!compressible || kUseStringCompression);
+ if (src_string_->IsCompressed()) {
+ const uint8_t* const src = src_string_->GetValueCompressed() + offset_;
+ memcpy(string->GetValueCompressed(), src, length * sizeof(uint8_t));
+ } else {
+ const uint16_t* const src = src_string_->GetValue() + offset_;
+ if (compressible) {
+ for (int i = 0; i < length; ++i) {
+ string->GetValueCompressed()[i] = static_cast<uint8_t>(src[i]);
+ }
+ } else {
+ memcpy(string->GetValue(), src, length * sizeof(uint16_t));
+ }
+ }
}
private:
@@ -133,17 +165,38 @@
}
inline uint16_t String::CharAt(int32_t index) {
- int32_t count = GetField32(OFFSET_OF_OBJECT_MEMBER(String, count_));
+ int32_t count = GetLength();
if (UNLIKELY((index < 0) || (index >= count))) {
ThrowStringIndexOutOfBoundsException(index, count);
return 0;
}
- return GetValue()[index];
+ if (IsCompressed()) {
+ return GetValueCompressed()[index];
+ } else {
+ return GetValue()[index];
+ }
+}
+
+template <typename MemoryType>
+int32_t String::FastIndexOf(MemoryType* chars, int32_t ch, int32_t start) {
+ const MemoryType* p = chars + start;
+ const MemoryType* end = chars + GetLength();
+ while (p < end) {
+ if (*p++ == ch) {
+ return (p - 1) - chars;
+ }
+ }
+ return -1;
}
template<VerifyObjectFlags kVerifyFlags>
inline size_t String::SizeOf() {
- size_t size = sizeof(String) + (sizeof(uint16_t) * GetLength<kVerifyFlags>());
+ size_t size = sizeof(String);
+ if (IsCompressed()) {
+ size += (sizeof(uint8_t) * GetLength<kVerifyFlags>());
+ } else {
+ size += (sizeof(uint16_t) * GetLength<kVerifyFlags>());
+ }
// String.equals() intrinsics assume zero-padding up to kObjectAlignment,
// so make sure the zero-padding is actually copied around if GC compaction
// chooses to copy only SizeOf() bytes.
@@ -152,31 +205,35 @@
}
template <bool kIsInstrumented, typename PreFenceVisitor>
-inline String* String::Alloc(Thread* self, int32_t utf16_length, gc::AllocatorType allocator_type,
+inline String* String::Alloc(Thread* self, int32_t utf16_length_with_flag,
+ gc::AllocatorType allocator_type,
const PreFenceVisitor& pre_fence_visitor) {
constexpr size_t header_size = sizeof(String);
- static_assert(sizeof(utf16_length) <= sizeof(size_t),
+ const bool compressible = kUseStringCompression &&
+ String::GetCompressionFlagFromCount(utf16_length_with_flag);
+ const size_t block_size = (compressible) ? sizeof(uint8_t) : sizeof(uint16_t);
+ size_t length = String::GetLengthFromCount(utf16_length_with_flag);
+ static_assert(sizeof(length) <= sizeof(size_t),
"static_cast<size_t>(utf16_length) must not lose bits.");
- size_t length = static_cast<size_t>(utf16_length);
- size_t data_size = sizeof(uint16_t) * length;
+ size_t data_size = block_size * length;
size_t size = header_size + data_size;
// String.equals() intrinsics assume zero-padding up to kObjectAlignment,
// so make sure the allocator clears the padding as well.
// http://b/23528461
size_t alloc_size = RoundUp(size, kObjectAlignment);
- Class* string_class = GetJavaLangString();
+ Class* string_class = GetJavaLangString();
// Check for overflow and throw OutOfMemoryError if this was an unreasonable request.
// Do this by comparing with the maximum length that will _not_ cause an overflow.
- constexpr size_t overflow_length = (-header_size) / sizeof(uint16_t); // Unsigned arithmetic.
- constexpr size_t max_alloc_length = overflow_length - 1u;
+ const size_t overflow_length = (-header_size) / block_size; // Unsigned arithmetic.
+ const size_t max_alloc_length = overflow_length - 1u;
static_assert(IsAligned<sizeof(uint16_t)>(kObjectAlignment),
"kObjectAlignment must be at least as big as Java char alignment");
- constexpr size_t max_length = RoundDown(max_alloc_length, kObjectAlignment / sizeof(uint16_t));
+ const size_t max_length = RoundDown(max_alloc_length, kObjectAlignment / block_size);
if (UNLIKELY(length > max_length)) {
self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
PrettyDescriptor(string_class).c_str(),
- utf16_length).c_str());
+ static_cast<int>(length)).c_str());
return nullptr;
}
@@ -187,11 +244,22 @@
}
template <bool kIsInstrumented>
+inline String* String::AllocEmptyString(Thread* self, gc::AllocatorType allocator_type) {
+ SetStringCountVisitor visitor(0);
+ return Alloc<kIsInstrumented>(self, 0, allocator_type, visitor);
+}
+
+template <bool kIsInstrumented>
inline String* String::AllocFromByteArray(Thread* self, int32_t byte_length,
Handle<ByteArray> array, int32_t offset,
int32_t high_byte, gc::AllocatorType allocator_type) {
- SetStringCountAndBytesVisitor visitor(byte_length, array, offset, high_byte << 8);
- String* string = Alloc<kIsInstrumented>(self, byte_length, allocator_type, visitor);
+ const uint8_t* const src = reinterpret_cast<uint8_t*>(array->GetData()) + offset;
+ const bool compressible = kUseStringCompression && String::AllASCII<uint8_t>(src, byte_length)
+ && (high_byte == 0);
+ const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(byte_length)
+ : byte_length;
+ SetStringCountAndBytesVisitor visitor(length_with_flag, array, offset, high_byte << 8);
+ String* string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
return string;
}
@@ -201,16 +269,24 @@
gc::AllocatorType allocator_type) {
// It is a caller error to have a count less than the actual array's size.
DCHECK_GE(array->GetLength(), count);
- SetStringCountAndValueVisitorFromCharArray visitor(count, array, offset);
- String* new_string = Alloc<kIsInstrumented>(self, count, allocator_type, visitor);
+ const bool compressible = kUseStringCompression &&
+ String::AllASCII<uint16_t>(array->GetData() + offset, count);
+ const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(count) : count;
+ SetStringCountAndValueVisitorFromCharArray visitor(length_with_flag, array, offset);
+ String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
return new_string;
}
template <bool kIsInstrumented>
inline String* String::AllocFromString(Thread* self, int32_t string_length, Handle<String> string,
int32_t offset, gc::AllocatorType allocator_type) {
- SetStringCountAndValueVisitorFromString visitor(string_length, string, offset);
- String* new_string = Alloc<kIsInstrumented>(self, string_length, allocator_type, visitor);
+ const bool compressible = kUseStringCompression &&
+ ((string->IsCompressed()) ? true : String::AllASCII<uint16_t>(string->GetValue() + offset,
+ string_length));
+ const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(string_length)
+ : string_length;
+ SetStringCountAndValueVisitorFromString visitor(length_with_flag, string, offset);
+ String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
return new_string;
}
@@ -219,11 +295,28 @@
if (UNLIKELY(result == 0)) {
result = ComputeHashCode();
}
- DCHECK(result != 0 || ComputeUtf16Hash(GetValue(), GetLength()) == 0)
- << ToModifiedUtf8() << " " << result;
+ if (kIsDebugBuild) {
+ if (IsCompressed()) {
+ DCHECK(result != 0 || ComputeUtf16Hash(GetValueCompressed(), GetLength()) == 0)
+ << ToModifiedUtf8() << " " << result;
+ } else {
+ DCHECK(result != 0 || ComputeUtf16Hash(GetValue(), GetLength()) == 0)
+ << ToModifiedUtf8() << " " << result;
+ }
+ }
return result;
}
+template<typename MemoryType>
+bool String::AllASCII(const MemoryType* const chars, const int length) {
+ for (int i = 0; i < length; ++i) {
+ if (chars[i] > 0x80) {
+ return false;
+ }
+ }
+ return true;
+}
+
} // namespace mirror
} // namespace art