String compression on intrinsics x86 and x86_64
Changes on intrinsics and Code Generation (x86 and x86_64)
for string compression feature. Currently the feature is off.
The size of boot.oat and boot.art for x86 before and after the
changes (feature OFF) are still. When the feature ON,
boot.oat increased by 0.83% and boot.art decreased by 19.32%.
Meanwhile for x86_64, size of boot.oat and boot.art before and
after changes (feature OFF) are still. When the feature ON,
boot.oat increased by 0.87% and boot.art decreased by 6.59%.
Turn feature on: runtime/mirror/string.h (kUseStringCompression = true)
runtime/asm_support.h (STRING_COMPRESSION_FEATURE 1)
Test: m -j31 test-art-host
All tests passed both when the mirror::kUseStringCompression
is ON and OFF.
The jni_internal_test changed to assert an empty string length
to be equal -(1 << 31) as it is compressed.
Bug: 31040547
Change-Id: Ia447c9b147cabb6a69e6ded86be1fe0c46d9638d
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index c300080..a7051ae 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -150,6 +150,9 @@
length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
}
__ movl(length_loc.AsRegister<Register>(), array_len);
+ if (mirror::kUseStringCompression) {
+ __ andl(length_loc.AsRegister<Register>(), Immediate(INT32_MAX));
+ }
}
x86_codegen->EmitParallelMoves(
locations->InAt(0),
@@ -5021,7 +5024,23 @@
case Primitive::kPrimChar: {
Register out = out_loc.AsRegister<Register>();
- __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ // Branch cases into compressed and uncompressed for each index's type.
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ NearLabel done, not_compressed;
+ __ cmpl(Address(obj, count_offset), Immediate(0));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ j(kGreaterEqual, ¬_compressed);
+ __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
+ __ jmp(&done);
+ __ Bind(¬_compressed);
+ __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
+ __ Bind(&done);
+ } else {
+ // Common case for charAt of array of char or when string compression's
+ // feature is turned off.
+ __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
+ }
break;
}
@@ -5359,6 +5378,10 @@
Register out = locations->Out().AsRegister<Register>();
__ movl(out, Address(obj, offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Mask out most significant bit in case the array is String's array of char.
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ __ andl(out, Immediate(INT32_MAX));
+ }
}
void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -5372,9 +5395,15 @@
if (!length->IsEmittedAtUseSite()) {
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
}
+ // Need register to see array's length.
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
+ const bool is_string_compressed_char_at =
+ mirror::kUseStringCompression && instruction->IsStringCharAt();
LocationSummary* locations = instruction->GetLocations();
Location index_loc = locations->InAt(0);
Location length_loc = locations->InAt(1);
@@ -5409,13 +5438,23 @@
uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
Location array_loc = array_length->GetLocations()->InAt(0);
Address array_len(array_loc.AsRegister<Register>(), len_offset);
- if (index_loc.IsConstant()) {
- int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
- __ cmpl(array_len, Immediate(value));
+ if (is_string_compressed_char_at) {
+ Register length_reg = locations->GetTemp(0).AsRegister<Register>();
+ __ movl(length_reg, array_len);
+ codegen_->MaybeRecordImplicitNullCheck(array_length);
+ __ andl(length_reg, Immediate(INT32_MAX));
+ codegen_->GenerateIntCompare(length_reg, index_loc);
} else {
- __ cmpl(array_len, index_loc.AsRegister<Register>());
+ // Checking bounds for general case:
+ // Array of char or string's array with feature compression off.
+ if (index_loc.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+ __ cmpl(array_len, Immediate(value));
+ } else {
+ __ cmpl(array_len, index_loc.AsRegister<Register>());
+ }
+ codegen_->MaybeRecordImplicitNullCheck(array_length);
}
- codegen_->MaybeRecordImplicitNullCheck(array_length);
} else {
codegen_->GenerateIntCompare(length_loc, index_loc);
}
@@ -7278,13 +7317,17 @@
void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
Register lhs_reg = lhs.AsRegister<Register>();
+ GenerateIntCompare(lhs_reg, rhs);
+}
+
+void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
if (rhs.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- Compare32BitValue(lhs_reg, value);
+ Compare32BitValue(lhs, value);
} else if (rhs.IsStackSlot()) {
- __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
+ __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
} else {
- __ cmpl(lhs_reg, rhs.AsRegister<Register>());
+ __ cmpl(lhs, rhs.AsRegister<Register>());
}
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 1ae9af3..1bd28da 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -474,6 +474,7 @@
// Compare int values. Supports only register locations for `lhs`.
void GenerateIntCompare(Location lhs, Location rhs);
+ void GenerateIntCompare(Register lhs, Location rhs);
// Construct address for array access.
static Address ArrayAddress(Register obj,
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index f9a3e42..b243ee0 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -198,6 +198,9 @@
length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
}
__ movl(length_loc.AsRegister<CpuRegister>(), array_len);
+ if (mirror::kUseStringCompression) {
+ __ andl(length_loc.AsRegister<CpuRegister>(), Immediate(INT32_MAX));
+ }
}
// We're moving two locations to locations that could overlap, so we need a parallel
@@ -4485,7 +4488,21 @@
case Primitive::kPrimChar: {
CpuRegister out = out_loc.AsRegister<CpuRegister>();
- __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ // Branch cases into compressed and uncompressed for each index's type.
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ NearLabel done, not_compressed;
+ __ cmpl(Address(obj, count_offset), Immediate(0));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ j(kGreaterEqual, ¬_compressed);
+ __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
+ __ jmp(&done);
+ __ Bind(¬_compressed);
+ __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
+ __ Bind(&done);
+ } else {
+ __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
+ }
break;
}
@@ -4807,6 +4824,10 @@
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
__ movl(out, Address(obj, offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Mask out most significant bit in case the array is String's array of char.
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ __ andl(out, Immediate(INT32_MAX));
+ }
}
void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -4856,13 +4877,23 @@
uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
Location array_loc = array_length->GetLocations()->InAt(0);
Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
- if (index_loc.IsConstant()) {
- int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
- __ cmpl(array_len, Immediate(value));
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ CpuRegister length_reg = CpuRegister(TMP);
+ __ movl(length_reg, array_len);
+ codegen_->MaybeRecordImplicitNullCheck(array_length);
+ __ andl(length_reg, Immediate(INT32_MAX));
+ codegen_->GenerateIntCompare(length_reg, index_loc);
} else {
- __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
+ // Checking the bound for general case:
+ // Array of char or String's array when the compression feature off.
+ if (index_loc.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
+ __ cmpl(array_len, Immediate(value));
+ } else {
+ __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
+ }
+ codegen_->MaybeRecordImplicitNullCheck(array_length);
}
- codegen_->MaybeRecordImplicitNullCheck(array_length);
} else {
codegen_->GenerateIntCompare(length_loc, index_loc);
}
@@ -6525,13 +6556,17 @@
void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
+ GenerateIntCompare(lhs_reg, rhs);
+}
+
+void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
if (rhs.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- Compare32BitValue(lhs_reg, value);
+ Compare32BitValue(lhs, value);
} else if (rhs.IsStackSlot()) {
- __ cmpl(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
+ __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
} else {
- __ cmpl(lhs_reg, rhs.AsRegister<CpuRegister>());
+ __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
}
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 594f051..8dec44e 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -510,8 +510,9 @@
void Compare32BitValue(CpuRegister dest, int32_t value);
void Compare64BitValue(CpuRegister dest, int64_t value);
- // Compare int values. Supports only register locations for `lhs`.
+ // Compare int values. Supports register locations for `lhs`.
void GenerateIntCompare(Location lhs, Location rhs);
+ void GenerateIntCompare(CpuRegister lhs, Location rhs);
// Compare long values. Supports only register locations for `lhs`.
void GenerateLongCompare(Location lhs, Location rhs);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index e61aba0..f41e4d9 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1401,23 +1401,39 @@
__ cmpl(str, arg);
__ j(kEqual, &return_true);
- // Load length of receiver string.
+ // Load length and compression flag of receiver string.
__ movl(ecx, Address(str, count_offset));
- // Check if lengths are equal, return false if they're not.
+ // Check if lengths and compression flags are equal, return false if they're not.
+ // Two identical strings will always have same compression style since
+ // compression style is decided on alloc.
__ cmpl(ecx, Address(arg, count_offset));
__ j(kNotEqual, &return_false);
- // Return true if both strings are empty.
- __ jecxz(&return_true);
+ if (mirror::kUseStringCompression) {
+ NearLabel string_uncompressed;
+ // Differ cases into both compressed or both uncompressed. Different compression style
+ // is cut above.
+ __ cmpl(ecx, Immediate(0));
+ __ j(kGreaterEqual, &string_uncompressed);
+ // Divide string length by 2, rounding up, and continue as if uncompressed.
+ // Merge clearing the compression flag (+0x80000000) with +1 for rounding.
+ __ addl(ecx, Immediate(0x80000001));
+ __ shrl(ecx, Immediate(1));
+ __ Bind(&string_uncompressed);
+ }
+ // Return true if strings are empty.
+ __ jecxz(&return_true);
// Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
__ leal(esi, Address(str, value_offset));
__ leal(edi, Address(arg, value_offset));
- // Divide string length by 2 to compare characters 2 at a time and adjust for odd lengths.
+ // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
+ // divisible by 2.
__ addl(ecx, Immediate(1));
__ shrl(ecx, Immediate(1));
- // Assertions that must hold in order to compare strings 2 characters at a time.
+ // Assertions that must hold in order to compare strings 2 characters (uncompressed)
+ // or 4 characters (compressed) at a time.
DCHECK_ALIGNED(value_offset, 4);
static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
@@ -1461,6 +1477,10 @@
locations->AddTemp(Location::RegisterLocation(ECX));
// Need another temporary to be able to compute the result.
locations->AddTemp(Location::RequiresRegister());
+ if (mirror::kUseStringCompression) {
+ // Need another temporary to be able to save unflagged string length.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
static void GenerateStringIndexOf(HInvoke* invoke,
@@ -1478,6 +1498,8 @@
Register counter = locations->GetTemp(0).AsRegister<Register>();
Register string_length = locations->GetTemp(1).AsRegister<Register>();
Register out = locations->Out().AsRegister<Register>();
+ // Only used when string compression feature is on.
+ Register string_length_flagged;
// Check our assumptions for registers.
DCHECK_EQ(string_obj, EDI);
@@ -1515,6 +1537,12 @@
// Load string length, i.e., the count field of the string.
__ movl(string_length, Address(string_obj, count_offset));
+ if (mirror::kUseStringCompression) {
+ string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
+ __ movl(string_length_flagged, string_length);
+ // Mask out first bit used as compression flag.
+ __ andl(string_length, Immediate(INT32_MAX));
+ }
// Do a zero-length check.
// TODO: Support jecxz.
@@ -1540,20 +1568,50 @@
__ cmpl(start_index, Immediate(0));
__ cmovl(kGreater, counter, start_index);
- // Move to the start of the string: string_obj + value_offset + 2 * start_index.
- __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+ if (mirror::kUseStringCompression) {
+ NearLabel modify_counter, offset_uncompressed_label;
+ __ cmpl(string_length_flagged, Immediate(0));
+ __ j(kGreaterEqual, &offset_uncompressed_label);
+ // Move to the start of the string: string_obj + value_offset + start_index.
+ __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
+ __ jmp(&modify_counter);
- // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
- // compare.
+ // Move to the start of the string: string_obj + value_offset + 2 * start_index.
+ __ Bind(&offset_uncompressed_label);
+ __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+
+ // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
+ // compare.
+ __ Bind(&modify_counter);
+ } else {
+ __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+ }
__ negl(counter);
__ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
}
- // Everything is set up for repne scasw:
- // * Comparison address in EDI.
- // * Counter in ECX.
- __ repne_scasw();
+ if (mirror::kUseStringCompression) {
+ NearLabel uncompressed_string_comparison;
+ NearLabel comparison_done;
+ __ cmpl(string_length_flagged, Immediate(0));
+ __ j(kGreater, &uncompressed_string_comparison);
+ // Check if EAX (search_value) is ASCII.
+ __ cmpl(search_value, Immediate(127));
+ __ j(kGreater, ¬_found_label);
+ // Comparing byte-per-byte.
+ __ repne_scasb();
+ __ jmp(&comparison_done);
+
+ // Everything is set up for repne scasw:
+ // * Comparison address in EDI.
+ // * Counter in ECX.
+ __ Bind(&uncompressed_string_comparison);
+ __ repne_scasw();
+ __ Bind(&comparison_done);
+ } else {
+ __ repne_scasw();
+ }
// Did we find a match?
__ j(kNotEqual, ¬_found_label);
@@ -1706,38 +1764,64 @@
const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
DCHECK_EQ(char_size, 2u);
- // Compute the address of the destination buffer.
- __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
-
- // Compute the address of the source string.
- if (srcBegin.IsConstant()) {
- // Compute the address of the source string by adding the number of chars from
- // the source beginning to the value offset of a string.
- __ leal(ESI, Address(obj, srcBegin_value * char_size + value_offset));
- } else {
- __ leal(ESI, Address(obj, srcBegin.AsRegister<Register>(),
- ScaleFactor::TIMES_2, value_offset));
- }
-
// Compute the number of chars (words) to move.
- // Now is the time to save ECX, since we don't know if it will be used later.
+ // Save ECX, since we don't know if it will be used later.
__ pushl(ECX);
int stack_adjust = kX86WordSize;
__ cfi().AdjustCFAOffset(stack_adjust);
DCHECK_EQ(srcEnd, ECX);
if (srcBegin.IsConstant()) {
- if (srcBegin_value != 0) {
- __ subl(ECX, Immediate(srcBegin_value));
- }
+ __ subl(ECX, Immediate(srcBegin_value));
} else {
DCHECK(srcBegin.IsRegister());
__ subl(ECX, srcBegin.AsRegister<Register>());
}
- // Do the move.
+ NearLabel done;
+ if (mirror::kUseStringCompression) {
+ // Location of count in string
+ const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+ DCHECK_EQ(c_char_size, 1u);
+ __ pushl(EAX);
+ __ cfi().AdjustCFAOffset(stack_adjust);
+
+ NearLabel copy_loop, copy_uncompressed;
+ __ cmpl(Address(obj, count_offset), Immediate(0));
+ __ j(kGreaterEqual, ©_uncompressed);
+ // Compute the address of the source string by adding the number of chars from
+ // the source beginning to the value offset of a string.
+ __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
+
+ // Start the loop to copy String's value to Array of Char.
+ __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
+ __ Bind(©_loop);
+ __ jecxz(&done);
+ // Use EAX temporary (convert byte from ESI to word).
+ // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
+ __ movzxb(EAX, Address(ESI, 0));
+ __ movw(Address(EDI, 0), EAX);
+ __ leal(EDI, Address(EDI, char_size));
+ __ leal(ESI, Address(ESI, c_char_size));
+ // TODO: Add support for LOOP to X86Assembler.
+ __ subl(ECX, Immediate(1));
+ __ jmp(©_loop);
+ __ Bind(©_uncompressed);
+ }
+
+ // Do the copy for uncompressed string.
+ // Compute the address of the destination buffer.
+ __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
+ __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
__ rep_movsw();
- // And restore ECX.
+ __ Bind(&done);
+ if (mirror::kUseStringCompression) {
+ // Restore EAX.
+ __ popl(EAX);
+ __ cfi().AdjustCFAOffset(-stack_adjust);
+ }
+ // Restore ECX.
__ popl(ECX);
__ cfi().AdjustCFAOffset(-stack_adjust);
}
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 0f31fab..4b0afca 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1568,14 +1568,27 @@
__ cmpl(str, arg);
__ j(kEqual, &return_true);
- // Load length of receiver string.
+ // Load length and compression flag of receiver string.
__ movl(rcx, Address(str, count_offset));
- // Check if lengths are equal, return false if they're not.
+ // Check if lengths and compressiond flags are equal, return false if they're not.
+ // Two identical strings will always have same compression style since
+ // compression style is decided on alloc.
__ cmpl(rcx, Address(arg, count_offset));
__ j(kNotEqual, &return_false);
+
+ if (mirror::kUseStringCompression) {
+ NearLabel string_uncompressed;
+ // Both string are compressed.
+ __ cmpl(rcx, Immediate(0));
+ __ j(kGreaterEqual, &string_uncompressed);
+ // Divide string length by 2, rounding up, and continue as if uncompressed.
+ // Merge clearing the compression flag with +1 for rounding.
+ __ addl(rcx, Immediate(static_cast<int32_t>(0x80000001)));
+ __ shrl(rcx, Immediate(1));
+ __ Bind(&string_uncompressed);
+ }
// Return true if both strings are empty.
__ jrcxz(&return_true);
-
// Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
__ leal(rsi, Address(str, value_offset));
__ leal(rdi, Address(arg, value_offset));
@@ -1584,7 +1597,8 @@
__ addl(rcx, Immediate(3));
__ shrl(rcx, Immediate(2));
- // Assertions that must hold in order to compare strings 4 characters at a time.
+ // Assertions that must hold in order to compare strings 4 characters (uncompressed)
+ // or 8 characters (compressed) at a time.
DCHECK_ALIGNED(value_offset, 8);
static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
@@ -1674,7 +1688,8 @@
__ j(kAbove, slow_path->GetEntryLabel());
}
- // From here down, we know that we are looking for a char that fits in 16 bits.
+ // From here down, we know that we are looking for a char that fits in
+ // 16 bits (uncompressed) or 8 bits (compressed).
// Location of reference to data array within the String object.
int32_t value_offset = mirror::String::ValueOffset().Int32Value();
// Location of count within the String object.
@@ -1682,6 +1697,12 @@
// Load string length, i.e., the count field of the string.
__ movl(string_length, Address(string_obj, count_offset));
+ if (mirror::kUseStringCompression) {
+ // Use TMP to keep string_length_flagged.
+ __ movl(CpuRegister(TMP), string_length);
+ // Mask out first bit used as compression flag.
+ __ andl(string_length, Immediate(INT32_MAX));
+ }
// Do a length check.
// TODO: Support jecxz.
@@ -1692,7 +1713,6 @@
if (start_at_zero) {
// Number of chars to scan is the same as the string length.
__ movl(counter, string_length);
-
// Move to the start of the string.
__ addq(string_obj, Immediate(value_offset));
} else {
@@ -1707,19 +1727,44 @@
__ cmpl(start_index, Immediate(0));
__ cmov(kGreater, counter, start_index, /* is64bit */ false); // 32-bit copy is enough.
- // Move to the start of the string: string_obj + value_offset + 2 * start_index.
- __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
-
+ if (mirror::kUseStringCompression) {
+ NearLabel modify_counter, offset_uncompressed_label;
+ __ cmpl(CpuRegister(TMP), Immediate(0));
+ __ j(kGreaterEqual, &offset_uncompressed_label);
+ __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
+ __ jmp(&modify_counter);
+ // Move to the start of the string: string_obj + value_offset + 2 * start_index.
+ __ Bind(&offset_uncompressed_label);
+ __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+ __ Bind(&modify_counter);
+ } else {
+ __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+ }
// Now update ecx, the work counter: it's gonna be string.length - start_index.
__ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
__ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
}
- // Everything is set up for repne scasw:
- // * Comparison address in RDI.
- // * Counter in ECX.
- __ repne_scasw();
-
+ if (mirror::kUseStringCompression) {
+ NearLabel uncompressed_string_comparison;
+ NearLabel comparison_done;
+ __ cmpl(CpuRegister(TMP), Immediate(0));
+ __ j(kGreater, &uncompressed_string_comparison);
+ // Check if RAX (search_value) is ASCII.
+ __ cmpl(search_value, Immediate(127));
+ __ j(kGreater, ¬_found_label);
+ // Comparing byte-per-byte.
+ __ repne_scasb();
+ __ jmp(&comparison_done);
+ // Everything is set up for repne scasw:
+ // * Comparison address in RDI.
+ // * Counter in ECX.
+ __ Bind(&uncompressed_string_comparison);
+ __ repne_scasw();
+ __ Bind(&comparison_done);
+ } else {
+ __ repne_scasw();
+ }
// Did we find a match?
__ j(kNotEqual, ¬_found_label);
@@ -1871,32 +1916,54 @@
const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
DCHECK_EQ(char_size, 2u);
- // Compute the address of the destination buffer.
- __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
-
- // Compute the address of the source string.
- if (srcBegin.IsConstant()) {
- // Compute the address of the source string by adding the number of chars from
- // the source beginning to the value offset of a string.
- __ leaq(CpuRegister(RSI), Address(obj, srcBegin_value * char_size + value_offset));
- } else {
- __ leaq(CpuRegister(RSI), Address(obj, srcBegin.AsRegister<CpuRegister>(),
- ScaleFactor::TIMES_2, value_offset));
- }
-
+ NearLabel done;
// Compute the number of chars (words) to move.
__ movl(CpuRegister(RCX), srcEnd);
if (srcBegin.IsConstant()) {
- if (srcBegin_value != 0) {
- __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
- }
+ __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
} else {
DCHECK(srcBegin.IsRegister());
__ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
}
+ if (mirror::kUseStringCompression) {
+ NearLabel copy_uncompressed, copy_loop;
+ const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+ DCHECK_EQ(c_char_size, 1u);
+ // Location of count in string.
+ const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ __ cmpl(Address(obj, count_offset), Immediate(0));
+ __ j(kGreaterEqual, ©_uncompressed);
+ // Compute the address of the source string by adding the number of chars from
+ // the source beginning to the value offset of a string.
+ __ leaq(CpuRegister(RSI),
+ CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
+ // Start the loop to copy String's value to Array of Char.
+ __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
+
+ __ Bind(©_loop);
+ __ jrcxz(&done);
+ // Use TMP as temporary (convert byte from RSI to word).
+ // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
+ __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
+ __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
+ __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
+ __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
+ // TODO: Add support for LOOP to X86_64Assembler.
+ __ subl(CpuRegister(RCX), Immediate(1));
+ __ jmp(©_loop);
+
+ __ Bind(©_uncompressed);
+ }
+
+ __ leaq(CpuRegister(RSI),
+ CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
+ // Compute the address of the destination buffer.
+ __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
// Do the move.
__ rep_movsw();
+
+ __ Bind(&done);
}
static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 879d496..7bb59ef 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1986,11 +1986,70 @@
mov MIRROR_STRING_COUNT_OFFSET(%ecx), %ebx
lea MIRROR_STRING_VALUE_OFFSET(%eax), %esi
lea MIRROR_STRING_VALUE_OFFSET(%ecx), %edi
+#if (STRING_COMPRESSION_FEATURE)
+ /* Differ cases */
+ cmpl LITERAL(0), %edx
+ jl .Lstring_compareto_this_is_compressed
+ cmpl LITERAL(0), %ebx
+ jl .Lstring_compareto_that_is_compressed
+ jmp .Lstring_compareto_both_not_compressed
+.Lstring_compareto_this_is_compressed:
+ andl LITERAL(0x7FFFFFFF), %edx
+ cmpl LITERAL(0), %ebx
+ jl .Lstring_compareto_both_compressed
+ /* If (this->IsCompressed() && that->IsCompressed() == false) */
+ mov %edx, %eax
+ subl %ebx, %eax
+ mov %edx, %ecx
+ cmovg %ebx, %ecx
+ /* Going into loop to compare each character */
+ jecxz .Lstring_compareto_keep_length // check loop counter (if 0, don't compare)
+.Lstring_compareto_loop_comparison_this_compressed:
+ movzbl (%esi), %edx // move *(this_cur_char) byte to long
+ movzwl (%edi), %ebx // move *(that_cur_char) word to long
+ addl LITERAL(1), %esi // ++this_cur_char (8-bit)
+ addl LITERAL(2), %edi // ++that_cur_char (16-bit)
+ subl %ebx, %edx
+ loope .Lstring_compareto_loop_comparison_this_compressed
+ cmovne %edx, %eax // return eax = *(this_cur_char) - *(that_cur_char)
+ jmp .Lstring_compareto_return
+.Lstring_compareto_that_is_compressed:
+ andl LITERAL(0x7FFFFFFF), %ebx
+ mov %edx, %eax
+ subl %ebx, %eax
+ mov %edx, %ecx
+ cmovg %ebx, %ecx
+ /* If (this->IsCompressed() == false && that->IsCompressed()) */
+ jecxz .Lstring_compareto_keep_length // check loop counter, if 0, don't compare
+.Lstring_compareto_loop_comparison_that_compressed:
+ movzwl (%esi), %edx // move *(this_cur_char) word to long
+ movzbl (%edi), %ebx // move *(that_cur_char) byte to long
+ addl LITERAL(2), %esi // ++this_cur_char (16-bit)
+ addl LITERAL(1), %edi // ++that_cur_char (8-bit)
+ subl %ebx, %edx
+ loope .Lstring_compareto_loop_comparison_that_compressed
+ cmovne %edx, %eax
+ jmp .Lstring_compareto_return // return eax = *(this_cur_char) - *(that_cur_char)
+.Lstring_compareto_both_compressed:
+ andl LITERAL(0x7FFFFFFF), %ebx
/* Calculate min length and count diff */
- mov %edx, %ecx
- mov %edx, %eax
- subl %ebx, %eax
- cmovg %ebx, %ecx
+ mov %edx, %ecx
+ mov %edx, %eax
+ subl %ebx, %eax
+ cmovg %ebx, %ecx
+ jecxz .Lstring_compareto_keep_length
+ repe cmpsb
+ je .Lstring_compareto_keep_length
+ movzbl -1(%esi), %eax // get last compared char from this string (8-bit)
+ movzbl -1(%edi), %ecx // get last compared char from comp string (8-bit)
+ jmp .Lstring_compareto_count_difference
+#endif // STRING_COMPRESSION_FEATURE
+.Lstring_compareto_both_not_compressed:
+ /* Calculate min length and count diff */
+ mov %edx, %ecx
+ mov %edx, %eax
+ subl %ebx, %eax
+ cmovg %ebx, %ecx
/*
* At this point we have:
* eax: value to return if first part of strings are equal
@@ -1998,18 +2057,15 @@
* esi: pointer to this string data
* edi: pointer to comp string data
*/
- jecxz .Lkeep_length
- repe cmpsw // find nonmatching chars in [%esi] and [%edi], up to length %ecx
- jne .Lnot_equal
-.Lkeep_length:
- POP edi // pop callee save reg
- POP esi // pop callee save reg
- ret
- .balign 16
-.Lnot_equal:
- movzwl -2(%esi), %eax // get last compared char from this string
- movzwl -2(%edi), %ecx // get last compared char from comp string
- subl %ecx, %eax // return the difference
+ jecxz .Lstring_compareto_keep_length
+ repe cmpsw // find nonmatching chars in [%esi] and [%edi], up to length %ecx
+ je .Lstring_compareto_keep_length
+ movzwl -2(%esi), %eax // get last compared char from this string (16-bit)
+ movzwl -2(%edi), %ecx // get last compared char from comp string (16-bit)
+.Lstring_compareto_count_difference:
+ subl %ecx, %eax
+.Lstring_compareto_keep_length:
+.Lstring_compareto_return:
POP edi // pop callee save reg
POP esi // pop callee save reg
ret
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index a11e402..54e52e5 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -2100,11 +2100,70 @@
/* Build pointers to the start of string data */
leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
+#if (STRING_COMPRESSION_FEATURE)
+ /* Differ cases */
+ cmpl LITERAL(0), %r8d
+ jl .Lstring_compareto_this_is_compressed
+ cmpl LITERAL(0), %r9d
+ jl .Lstring_compareto_that_is_compressed
+ jmp .Lstring_compareto_both_not_compressed
+.Lstring_compareto_this_is_compressed:
+ andl LITERAL(0x7FFFFFFF), %r8d
+ cmpl LITERAL(0), %r9d
+ jl .Lstring_compareto_both_compressed
+ /* Comparison this (8-bit) and that (16-bit) */
+ mov %r8d, %eax
+ subl %r9d, %eax
+ mov %r8d, %ecx
+ cmovg %r9d, %ecx
+ /* Going into loop to compare each character */
+ jecxz .Lstring_compareto_keep_length // check loop counter (if 0 then stop)
+.Lstring_compareto_loop_comparison_this_compressed:
+ movzbl (%edi), %r8d // move *(this_cur_char) byte to long
+ movzwl (%esi), %r9d // move *(that_cur_char) word to long
+ addl LITERAL(1), %edi // ++this_cur_char (8-bit)
+ addl LITERAL(2), %esi // ++that_cur_char (16-bit)
+ subl %r9d, %r8d
+ loope .Lstring_compareto_loop_comparison_this_compressed
+ cmovne %r8d, %eax // return eax = *(this_cur_char) - *(that_cur_char)
+ ret
+.Lstring_compareto_that_is_compressed:
+ andl LITERAL(0x7FFFFFFF), %r9d
+ movl %r8d, %eax
+ subl %r9d, %eax
+ mov %r8d, %ecx
+ cmovg %r9d, %ecx
+ /* Comparison this (8-bit) and that (16-bit) */
+ jecxz .Lstring_compareto_keep_length // check loop counter (if 0, don't compare)
+.Lstring_compareto_loop_comparison_that_compressed:
+ movzwl (%edi), %r8d // move *(this_cur_char) word to long
+ movzbl (%esi), %r9d // move *(that_cur_chat) byte to long
+ addl LITERAL(2), %edi // ++this_cur_char (16-bit)
+ addl LITERAL(1), %esi // ++that_cur_char (8-bit)
+ subl %r9d, %r8d
+ loope .Lstring_compareto_loop_comparison_that_compressed
+ cmovne %r8d, %eax // return eax = *(this_cur_char) - *(that_cur_char)
+ ret
+.Lstring_compareto_both_compressed:
+ andl LITERAL(0x7FFFFFFF), %r9d
/* Calculate min length and count diff */
- movl %r8d, %ecx
- movl %r8d, %eax
- subl %r9d, %eax
- cmovg %r9d, %ecx
+ movl %r8d, %ecx
+ movl %r8d, %eax
+ subl %r9d, %eax
+ cmovg %r9d, %ecx
+ jecxz .Lstring_compareto_keep_length
+ repe cmpsb
+ je .Lstring_compareto_keep_length
+ movzbl -1(%edi), %eax // get last compared char from this string (8-bit)
+ movzbl -1(%esi), %ecx // get last compared char from comp string (8-bit)
+ jmp .Lstring_compareto_count_difference
+#endif // STRING_COMPRESSION_FEATURE
+.Lstring_compareto_both_not_compressed:
+ /* Calculate min length and count diff */
+ movl %r8d, %ecx
+ movl %r8d, %eax
+ subl %r9d, %eax
+ cmovg %r9d, %ecx
/*
* At this point we have:
* eax: value to return if first part of strings are equal
@@ -2112,16 +2171,14 @@
* esi: pointer to comp string data
* edi: pointer to this string data
*/
- jecxz .Lkeep_length
- repe cmpsw // find nonmatching chars in [%esi] and [%edi], up to length %ecx
- jne .Lnot_equal
-.Lkeep_length:
- ret
- .balign 16
-.Lnot_equal:
- movzwl -2(%edi), %eax // get last compared char from this string
- movzwl -2(%esi), %ecx // get last compared char from comp string
+ jecxz .Lstring_compareto_keep_length
+ repe cmpsw // find nonmatching chars in [%esi] and [%edi], up to length %ecx
+ je .Lstring_compareto_keep_length
+ movzwl -2(%edi), %eax // get last compared char from this string (16-bit)
+ movzwl -2(%esi), %ecx // get last compared char from comp string (16-bit)
+.Lstring_compareto_count_difference:
subl %ecx, %eax // return the difference
+.Lstring_compareto_keep_length:
ret
END_FUNCTION art_quick_string_compareto
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index b8f7272..567791e 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -240,7 +240,9 @@
#define MIRROR_STRING_VALUE_OFFSET (8 + MIRROR_OBJECT_HEADER_SIZE)
ADD_TEST_EQ(MIRROR_STRING_VALUE_OFFSET, art::mirror::String::ValueOffset().Int32Value())
-
+// String compression feature.
+#define STRING_COMPRESSION_FEATURE 0
+ADD_TEST_EQ(STRING_COMPRESSION_FEATURE, art::mirror::kUseStringCompression);
#if defined(__cplusplus)
} // End of CheckAsmSupportOffsets.
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index fe0081c..4c37b0e 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -679,7 +679,12 @@
ASSERT_TRUE(env_->IsInstanceOf(o, c));
// ...whose fields haven't been initialized because
// we didn't call a constructor.
- ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
+ if (art::mirror::kUseStringCompression) {
+ // Zero-length string is compressed, so the length internally will be -(1 << 31).
+ ASSERT_EQ(-2147483648, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
+ } else {
+ ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
+ }
}
TEST_F(JniInternalTest, GetVersion) {