summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
author jessicahandojo <jessicahandojo@google.com> 2016-09-09 19:01:32 -0700
committer jessicahandojo <jessicahandojo@google.com> 2016-09-30 10:51:50 -0700
commit0576575d075e97a227010b4adf74ad5c8a920bde (patch)
treec31eb030933e74d6d103979bcf0d1226e769eb5b /compiler/optimizing
parent9e5739aaa690a8529c104f4c05035a657616c310 (diff)
String Compression for ARM and ARM64
Changes on intrinsics and Code Generation on ARM and ARM64 for string compression feature. Currently the feature is off. The size of boot.oat and boot.art for ARM before and after the changes (feature OFF) are still. When the feature ON, boot.oat increased by 0.60% and boot.art decreased by 9.38%. Meanwhile for ARM64, size of boot.oat and boot.art before and after changes (feature OFF) are still. When the feature ON, boot.oat increased by 0.48% and boot.art decreased by 6.58%. Turn feature on: runtime/mirror/string.h (kUseStringCompression = true) runtime/asm_support.h (STRING_COMPRESSION_FEATURE 1) Test: m -j31 test-art-target All tests passed both when the mirror::kUseStringCompression is ON and OFF. Bug: 31040547 Change-Id: I24e86b99391df33ba27df747779b648c5a820649
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm.cc58
-rw-r--r--compiler/optimizing/code_generator_arm64.cc55
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.cc8
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc7
-rw-r--r--compiler/optimizing/intrinsics_arm.cc210
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc182
6 files changed, 448 insertions, 72 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index e343657f29..9870876879 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -4633,7 +4633,9 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
}
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
- if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ // Also need for String compression feature.
+ if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
+ || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -4646,6 +4648,8 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
Location out_loc = locations->Out();
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
Primitive::Type type = instruction->GetType();
+ const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
+ instruction->IsStringCharAt();
HInstruction* array_instr = instruction->GetArray();
bool has_intermediate_address = array_instr->IsIntermediateAddress();
// The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
@@ -4659,10 +4663,31 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimInt: {
if (index.IsConstant()) {
int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
- uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
+ if (maybe_compressed_char_at) {
+ Register length = IP;
+ Label uncompressed_load, done;
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ __ LoadFromOffset(kLoadWord, length, obj, count_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ cmp(length, ShifterOperand(0));
+ __ b(&uncompressed_load, GE);
+ __ LoadFromOffset(kLoadUnsignedByte,
+ out_loc.AsRegister<Register>(),
+ obj,
+ data_offset + const_index);
+ __ b(&done);
+ __ Bind(&uncompressed_load);
+ __ LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
+ out_loc.AsRegister<Register>(),
+ obj,
+ data_offset + (const_index << 1));
+ __ Bind(&done);
+ } else {
+ uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
- LoadOperandType load_type = GetLoadOperandType(type);
- __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset);
+ LoadOperandType load_type = GetLoadOperandType(type);
+ __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset);
+ }
} else {
Register temp = IP;
@@ -4678,7 +4703,24 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
} else {
__ add(temp, obj, ShifterOperand(data_offset));
}
- codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
+ if (maybe_compressed_char_at) {
+ Label uncompressed_load, done;
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ Register length = locations->GetTemp(0).AsRegister<Register>();
+ __ LoadFromOffset(kLoadWord, length, obj, count_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ cmp(length, ShifterOperand(0));
+ __ b(&uncompressed_load, GE);
+ __ ldrb(out_loc.AsRegister<Register>(),
+ Address(temp, index.AsRegister<Register>(), Shift::LSL, 0));
+ __ b(&done);
+ __ Bind(&uncompressed_load);
+ __ ldrh(out_loc.AsRegister<Register>(),
+ Address(temp, index.AsRegister<Register>(), Shift::LSL, 1));
+ __ Bind(&done);
+ } else {
+ codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
+ }
}
break;
}
@@ -4778,7 +4820,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
if (type == Primitive::kPrimNot) {
// Potential implicit null checks, in the case of reference
// arrays, are handled in the previous switch statement.
- } else {
+ } else if (!maybe_compressed_char_at) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
}
@@ -5068,6 +5110,10 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) {
Register out = locations->Out().AsRegister<Register>();
__ LoadFromOffset(kLoadWord, out, obj, offset);
codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Mask out compression flag from String's array length.
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ __ bic(out, out, ShifterOperand(1u << 31));
+ }
}
void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 5d002674d8..969d653f97 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2101,7 +2101,8 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
Location index = locations->InAt(1);
Location out = locations->Out();
uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
-
+ const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
+ instruction->IsStringCharAt();
MacroAssembler* masm = GetVIXLAssembler();
UseScratchRegisterScope temps(masm);
// Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
@@ -2119,9 +2120,28 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
} else {
// General case.
MemOperand source = HeapOperand(obj);
+ Register length;
+ if (maybe_compressed_char_at) {
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ length = temps.AcquireW();
+ __ Ldr(length, HeapOperand(obj, count_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
if (index.IsConstant()) {
- offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
- source = HeapOperand(obj, offset);
+ if (maybe_compressed_char_at) {
+ vixl::aarch64::Label uncompressed_load, done;
+ __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load);
+ __ Ldrb(Register(OutputCPURegister(instruction)),
+ HeapOperand(obj, offset + Int64ConstantFrom(index)));
+ __ B(&done);
+ __ Bind(&uncompressed_load);
+ __ Ldrh(Register(OutputCPURegister(instruction)),
+ HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1)));
+ __ Bind(&done);
+ } else {
+ offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+ source = HeapOperand(obj, offset);
+ }
} else {
Register temp = temps.AcquireSameSizeAs(obj);
if (instruction->GetArray()->IsIntermediateAddress()) {
@@ -2139,11 +2159,24 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
} else {
__ Add(temp, obj, offset);
}
- source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+ if (maybe_compressed_char_at) {
+ vixl::aarch64::Label uncompressed_load, done;
+ __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load);
+ __ Ldrb(Register(OutputCPURegister(instruction)),
+ HeapOperand(temp, XRegisterFrom(index), LSL, 0));
+ __ B(&done);
+ __ Bind(&uncompressed_load);
+ __ Ldrh(Register(OutputCPURegister(instruction)),
+ HeapOperand(temp, XRegisterFrom(index), LSL, 1));
+ __ Bind(&done);
+ } else {
+ source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+ }
+ }
+ if (!maybe_compressed_char_at) {
+ codegen_->Load(type, OutputCPURegister(instruction), source);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
-
- codegen_->Load(type, OutputCPURegister(instruction), source);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
if (type == Primitive::kPrimNot) {
static_assert(
@@ -2167,9 +2200,14 @@ void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
+ vixl::aarch64::Register out = OutputRegister(instruction);
BlockPoolsScope block_pools(GetVIXLAssembler());
- __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), offset));
+ __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Mask out compression flag from String's array length.
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ __ And(out.W(), out.W(), Operand(static_cast<int32_t>(INT32_MAX)));
+ }
}
void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
@@ -2361,7 +2399,6 @@ void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction)
BoundsCheckSlowPathARM64* slow_path =
new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction);
codegen_->AddSlowPath(slow_path);
-
__ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
__ B(slow_path->GetEntryLabel(), hs);
}
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index 495f3fd232..56e4c7a9c2 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -44,6 +44,14 @@ void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) {
size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
Primitive::Type type = instruction->GetType();
+ // TODO: Implement reading (length + compression) for String compression feature from
+ // negative offset (count_offset - data_offset). Thumb2Assembler does not support T4
+ // encoding of "LDR (immediate)" at the moment.
+ // Don't move array pointer if it is charAt because we need to take the count first.
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ return;
+ }
+
if (type == Primitive::kPrimLong
|| type == Primitive::kPrimFloat
|| type == Primitive::kPrimDouble) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 6d107d571f..d0dd650024 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -140,6 +140,13 @@ void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) {
void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+ // Don't move the array pointer if it is charAt because we need to take the count first.
+ // TODO: Implement reading (length + compression) for String compression feature from
+ // negative offset (count_offset - data_offset) using LDP and clobbering an extra temporary.
+ // Note that "LDR (Immediate)" does not have a "signed offset" encoding.
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ return;
+ }
if (TryExtractArrayAccessAddress(instruction,
instruction->GetArray(),
instruction->GetIndex(),
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index fd2da1004b..96a6ecbee9 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1039,6 +1039,11 @@ void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ // Need temporary registers for String compression's feature.
+ if (mirror::kUseStringCompression) {
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ }
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
}
@@ -1053,10 +1058,16 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
Register temp0 = locations->GetTemp(0).AsRegister<Register>();
Register temp1 = locations->GetTemp(1).AsRegister<Register>();
Register temp2 = locations->GetTemp(2).AsRegister<Register>();
+ Register temp3, temp4;
+ if (mirror::kUseStringCompression) {
+ temp3 = locations->GetTemp(3).AsRegister<Register>();
+ temp4 = locations->GetTemp(4).AsRegister<Register>();
+ }
Label loop;
Label find_char_diff;
Label end;
+ Label different_compression;
// Get offsets of count and value fields within a string object.
const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -1077,20 +1088,40 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
// Reference equality check, return 0 if same reference.
__ subs(out, str, ShifterOperand(arg));
__ b(&end, EQ);
- // Load lengths of this and argument strings.
- __ ldr(temp2, Address(str, count_offset));
- __ ldr(temp1, Address(arg, count_offset));
+ if (mirror::kUseStringCompression) {
+ // Load lengths of this and argument strings.
+ __ ldr(temp3, Address(str, count_offset));
+ __ ldr(temp4, Address(arg, count_offset));
+ // Clean out compression flag from lengths.
+ __ bic(temp0, temp3, ShifterOperand(0x80000000));
+ __ bic(IP, temp4, ShifterOperand(0x80000000));
+ } else {
+ // Load lengths of this and argument strings.
+ __ ldr(temp0, Address(str, count_offset));
+ __ ldr(IP, Address(arg, count_offset));
+ }
// out = length diff.
- __ subs(out, temp2, ShifterOperand(temp1));
+ __ subs(out, temp0, ShifterOperand(IP));
// temp0 = min(len(str), len(arg)).
- __ it(Condition::LT, kItElse);
- __ mov(temp0, ShifterOperand(temp2), Condition::LT);
- __ mov(temp0, ShifterOperand(temp1), Condition::GE);
+ __ it(GT);
+ __ mov(temp0, ShifterOperand(IP), GT);
// Shorter string is empty?
__ CompareAndBranchIfZero(temp0, &end);
+ if (mirror::kUseStringCompression) {
+ // Check if both strings using same compression style to use this comparison loop.
+ __ eors(temp3, temp3, ShifterOperand(temp4));
+ __ b(&different_compression, MI);
+ }
// Store offset of string value in preparation for comparison loop.
__ mov(temp1, ShifterOperand(value_offset));
+ if (mirror::kUseStringCompression) {
+ // For string compression, calculate the number of bytes to compare (not chars).
+ // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
+ __ cmp(temp4, ShifterOperand(0));
+ __ it(GE);
+ __ add(temp0, temp0, ShifterOperand(temp0), GE);
+ }
// Assertions that must hold in order to compare multiple characters at a time.
CHECK_ALIGNED(value_offset, 8);
@@ -1100,6 +1131,7 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
DCHECK_EQ(char_size, 2u);
+ Label find_char_diff_2nd_cmp;
// Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
__ Bind(&loop);
__ ldr(IP, Address(str, temp1));
@@ -1107,43 +1139,113 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
__ cmp(IP, ShifterOperand(temp2));
__ b(&find_char_diff, NE);
__ add(temp1, temp1, ShifterOperand(char_size * 2));
- __ sub(temp0, temp0, ShifterOperand(2));
__ ldr(IP, Address(str, temp1));
__ ldr(temp2, Address(arg, temp1));
__ cmp(IP, ShifterOperand(temp2));
- __ b(&find_char_diff, NE);
+ __ b(&find_char_diff_2nd_cmp, NE);
__ add(temp1, temp1, ShifterOperand(char_size * 2));
- __ subs(temp0, temp0, ShifterOperand(2));
-
- __ b(&loop, GT);
+ // With string compression, we have compared 8 bytes, otherwise 4 chars.
+ __ subs(temp0, temp0, ShifterOperand(mirror::kUseStringCompression ? 8 : 4));
+ __ b(&loop, HI);
__ b(&end);
- // Find the single 16-bit character difference.
+ __ Bind(&find_char_diff_2nd_cmp);
+ if (mirror::kUseStringCompression) {
+ __ subs(temp0, temp0, ShifterOperand(4)); // 4 bytes previously compared.
+ __ b(&end, LS); // Was the second comparison fully beyond the end?
+ } else {
+ // Without string compression, we can start treating temp0 as signed
+ // and rely on the signed comparison below.
+ __ sub(temp0, temp0, ShifterOperand(2));
+ }
+
+ // Find the single character difference.
__ Bind(&find_char_diff);
// Get the bit position of the first character that differs.
__ eor(temp1, temp2, ShifterOperand(IP));
__ rbit(temp1, temp1);
__ clz(temp1, temp1);
- // temp0 = number of 16-bit characters remaining to compare.
- // (it could be < 1 if a difference is found after the first SUB in the comparison loop, and
- // after the end of the shorter string data).
-
- // (temp1 >> 4) = character where difference occurs between the last two words compared, on the
- // interval [0,1] (0 for low half-word different, 1 for high half-word different).
-
- // If temp0 <= (temp1 >> 4), the difference occurs outside the remaining string data, so just
- // return length diff (out).
- __ cmp(temp0, ShifterOperand(temp1, LSR, 4));
- __ b(&end, LE);
+ // temp0 = number of characters remaining to compare.
+ // (Without string compression, it could be < 1 if a difference is found by the second CMP
+ // in the comparison loop, and after the end of the shorter string data).
+
+ // Without string compression (temp1 >> 4) = character where difference occurs between the last
+ // two words compared, in the interval [0,1].
+ // (0 for low half-word different, 1 for high half-word different).
+ // With string compression, (temp1 << 3) = byte where the difference occurs,
+ // in the interval [0,3].
+
+ // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
+ // the remaining string data, so just return length diff (out).
+ // The comparison is unsigned for string compression, otherwise signed.
+ __ cmp(temp0, ShifterOperand(temp1, LSR, mirror::kUseStringCompression ? 3 : 4));
+ __ b(&end, mirror::kUseStringCompression ? LS : LE);
// Extract the characters and calculate the difference.
+ Label uncompressed_string, continue_process;
+ if (mirror::kUseStringCompression) {
+ __ cmp(temp4, ShifterOperand(0));
+ __ b(&uncompressed_string, GE);
+ __ bic(temp1, temp1, ShifterOperand(0x7));
+ __ b(&continue_process);
+ }
+ __ Bind(&uncompressed_string);
__ bic(temp1, temp1, ShifterOperand(0xf));
+ __ Bind(&continue_process);
+
__ Lsr(temp2, temp2, temp1);
__ Lsr(IP, IP, temp1);
+ Label calculate_difference, uncompressed_string_extract_chars;
+ if (mirror::kUseStringCompression) {
+ __ cmp(temp4, ShifterOperand(0));
+ __ b(&uncompressed_string_extract_chars, GE);
+ __ ubfx(temp2, temp2, 0, 8);
+ __ ubfx(IP, IP, 0, 8);
+ __ b(&calculate_difference);
+ }
+ __ Bind(&uncompressed_string_extract_chars);
__ movt(temp2, 0);
__ movt(IP, 0);
+ __ Bind(&calculate_difference);
__ sub(out, IP, ShifterOperand(temp2));
+ __ b(&end);
+
+ if (mirror::kUseStringCompression) {
+ const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+ DCHECK_EQ(c_char_size, 1u);
+ Label loop_arg_compressed, loop_this_compressed, find_diff;
+ // Comparison for different compression style.
+ // This part is when THIS is compressed and ARG is not.
+ __ Bind(&different_compression);
+ __ add(temp2, str, ShifterOperand(value_offset));
+ __ add(temp3, arg, ShifterOperand(value_offset));
+ __ cmp(temp4, ShifterOperand(0));
+ __ b(&loop_arg_compressed, LT);
+
+ __ Bind(&loop_this_compressed);
+ __ ldrb(IP, Address(temp2, c_char_size, Address::PostIndex));
+ __ ldrh(temp4, Address(temp3, char_size, Address::PostIndex));
+ __ cmp(IP, ShifterOperand(temp4));
+ __ b(&find_diff, NE);
+ __ subs(temp0, temp0, ShifterOperand(1));
+ __ b(&loop_this_compressed, GT);
+ __ b(&end);
+
+ // This part is when THIS is not compressed and ARG is.
+ __ Bind(&loop_arg_compressed);
+ __ ldrh(IP, Address(temp2, char_size, Address::PostIndex));
+ __ ldrb(temp4, Address(temp3, c_char_size, Address::PostIndex));
+ __ cmp(IP, ShifterOperand(temp4));
+ __ b(&find_diff, NE);
+ __ subs(temp0, temp0, ShifterOperand(1));
+ __ b(&loop_arg_compressed, GT);
+ __ b(&end);
+
+ // Calculate the difference.
+ __ Bind(&find_diff);
+ __ sub(out, IP, ShifterOperand(temp4));
+ }
__ Bind(&end);
@@ -1180,7 +1282,7 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
Register temp1 = locations->GetTemp(1).AsRegister<Register>();
Register temp2 = locations->GetTemp(2).AsRegister<Register>();
- Label loop;
+ Label loop, preloop;
Label end;
Label return_true;
Label return_false;
@@ -1214,11 +1316,15 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
__ ldr(temp, Address(str, count_offset));
__ ldr(temp1, Address(arg, count_offset));
// Check if lengths are equal, return false if they're not.
+ // Also compares the compression style, if differs return false.
__ cmp(temp, ShifterOperand(temp1));
__ b(&return_false, NE);
// Return true if both strings are empty.
+ if (mirror::kUseStringCompression) {
+ // Length needs to be masked out first because 0 is treated as compressed.
+ __ bic(temp, temp, ShifterOperand(0x80000000));
+ }
__ cbz(temp, &return_true);
-
// Reference equality check, return true if same reference.
__ cmp(str, ShifterOperand(arg));
__ b(&return_true, EQ);
@@ -1227,10 +1333,19 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
DCHECK_ALIGNED(value_offset, 4);
static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
- __ LoadImmediate(temp1, value_offset);
-
+ if (mirror::kUseStringCompression) {
+ // If not compressed, directly to fast compare. Else do preprocess on length.
+ __ cmp(temp1, ShifterOperand(0));
+ __ b(&preloop, GT);
+ // Mask out compression flag and adjust length for compressed string (8-bit)
+ // as if it is a 16-bit data, new_length = (length + 1) / 2.
+ __ add(temp, temp, ShifterOperand(1));
+ __ Lsr(temp, temp, 1);
+ __ Bind(&preloop);
+ }
// Loop to compare strings 2 characters at a time starting at the front of the string.
// Ok to do this because strings with an odd length are zero-padded.
+ __ LoadImmediate(temp1, value_offset);
__ Bind(&loop);
__ ldr(out, Address(str, temp1));
__ ldr(temp2, Address(arg, temp1));
@@ -2330,22 +2445,31 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Register src_ptr = locations->GetTemp(1).AsRegister<Register>();
Register dst_ptr = locations->GetTemp(2).AsRegister<Register>();
- // src range to copy.
- __ add(src_ptr, srcObj, ShifterOperand(value_offset));
- __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
-
+ Label done, compressed_string_loop;
// dst to be copied.
__ add(dst_ptr, dstObj, ShifterOperand(data_offset));
__ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1));
__ subs(num_chr, srcEnd, ShifterOperand(srcBegin));
-
- // Do the copy.
- Label loop, remainder, done;
-
// Early out for valid zero-length retrievals.
__ b(&done, EQ);
+ // src range to copy.
+ __ add(src_ptr, srcObj, ShifterOperand(value_offset));
+ Label compressed_string_preloop;
+ if (mirror::kUseStringCompression) {
+ // Location of count in string.
+ const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ // String's length.
+ __ ldr(IP, Address(srcObj, count_offset));
+ __ cmp(IP, ShifterOperand(0));
+ __ b(&compressed_string_preloop, LT);
+ }
+ __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
+
+ // Do the copy.
+ Label loop, remainder;
+
// Save repairing the value of num_chr on the < 4 character path.
__ subs(IP, num_chr, ShifterOperand(4));
__ b(&remainder, LT);
@@ -2374,6 +2498,20 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ subs(num_chr, num_chr, ShifterOperand(1));
__ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
__ b(&remainder, GT);
+ __ b(&done);
+
+ if (mirror::kUseStringCompression) {
+ const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+ DCHECK_EQ(c_char_size, 1u);
+ // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
+ __ Bind(&compressed_string_preloop);
+ __ add(src_ptr, src_ptr, ShifterOperand(srcBegin));
+ __ Bind(&compressed_string_loop);
+ __ ldrb(IP, Address(src_ptr, c_char_size, Address::PostIndex));
+ __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
+ __ subs(num_chr, num_chr, ShifterOperand(1));
+ __ b(&compressed_string_loop, GT);
+ }
__ Bind(&done);
}
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index ce58657bcd..e2c1802fdc 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1223,6 +1223,11 @@ void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ // Need temporary registers for String compression's feature.
+ if (mirror::kUseStringCompression) {
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ }
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
}
@@ -1239,10 +1244,16 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
Register temp0 = WRegisterFrom(locations->GetTemp(0));
Register temp1 = WRegisterFrom(locations->GetTemp(1));
Register temp2 = WRegisterFrom(locations->GetTemp(2));
+ Register temp3, temp5;
+ if (mirror::kUseStringCompression) {
+ temp3 = WRegisterFrom(locations->GetTemp(3));
+ temp5 = WRegisterFrom(locations->GetTemp(4));
+ }
vixl::aarch64::Label loop;
vixl::aarch64::Label find_char_diff;
vixl::aarch64::Label end;
+ vixl::aarch64::Label different_compression;
// Get offsets of count and value fields within a string object.
const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -1263,9 +1274,18 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
// Reference equality check, return 0 if same reference.
__ Subs(out, str, arg);
__ B(&end, eq);
- // Load lengths of this and argument strings.
- __ Ldr(temp0, HeapOperand(str, count_offset));
- __ Ldr(temp1, HeapOperand(arg, count_offset));
+ if (mirror::kUseStringCompression) {
+ // Load lengths of this and argument strings.
+ __ Ldr(temp3, HeapOperand(str, count_offset));
+ __ Ldr(temp5, HeapOperand(arg, count_offset));
+ // Clean out compression flag from lengths.
+ __ Bic(temp0, temp3, Operand(static_cast<int32_t>(0x80000000)));
+ __ Bic(temp1, temp5, Operand(static_cast<int32_t>(0x80000000)));
+ } else {
+ // Load lengths of this and argument strings.
+ __ Ldr(temp0, HeapOperand(str, count_offset));
+ __ Ldr(temp1, HeapOperand(arg, count_offset));
+ }
// Return zero if both strings are empty.
__ Orr(out, temp0, temp1);
__ Cbz(out, &end);
@@ -1276,8 +1296,22 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
// Shorter string is empty?
__ Cbz(temp2, &end);
+ if (mirror::kUseStringCompression) {
+ // Check if both strings using same compression style to use this comparison loop.
+ __ Eor(temp3.W(), temp3, Operand(temp5));
+ __ Tbnz(temp3.W(), kWRegSize - 1, &different_compression);
+ }
// Store offset of string value in preparation for comparison loop.
__ Mov(temp1, value_offset);
+ if (mirror::kUseStringCompression) {
+ // For string compression, calculate the number of bytes to compare (not chars).
+ // This could be in theory exceed INT32_MAX, so treat temp2 as unsigned.
+ vixl::aarch64::Label let_it_signed;
+ __ Cmp(temp5, Operand(0));
+ __ B(lt, &let_it_signed);
+ __ Add(temp2, temp2, Operand(temp2));
+ __ Bind(&let_it_signed);
+ }
UseScratchRegisterScope scratch_scope(masm);
Register temp4 = scratch_scope.AcquireX();
@@ -1299,29 +1333,90 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
__ Cmp(temp4, temp0);
__ B(ne, &find_char_diff);
__ Add(temp1, temp1, char_size * 4);
- __ Subs(temp2, temp2, 4);
- __ B(gt, &loop);
+ // With string compression, we have compared 8 bytes, otherwise 4 chars.
+ __ Subs(temp2, temp2, (mirror::kUseStringCompression) ? 8 : 4);
+ __ B(hi, &loop);
__ B(&end);
// Promote temp1 to an X reg, ready for EOR.
temp1 = temp1.X();
- // Find the single 16-bit character difference.
+ // Find the single character difference.
__ Bind(&find_char_diff);
// Get the bit position of the first character that differs.
__ Eor(temp1, temp0, temp4);
__ Rbit(temp1, temp1);
__ Clz(temp1, temp1);
- // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then
+ // If the number of chars remaining <= the index where the difference occurs (0-3), then
// the difference occurs outside the remaining string data, so just return length diff (out).
- __ Cmp(temp2, Operand(temp1.W(), LSR, 4));
- __ B(le, &end);
+ // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
+ // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
+ // unsigned when string compression is disabled.
+ // When it's enabled, the comparison must be unsigned.
+ __ Cmp(temp2, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
+ __ B(ls, &end);
// Extract the characters and calculate the difference.
+ vixl::aarch64::Label uncompressed_string, continue_process;
+ if (mirror:: kUseStringCompression) {
+ __ Tbz(temp5, kWRegSize - 1, &uncompressed_string);
+ __ Bic(temp1, temp1, 0x7);
+ __ B(&continue_process);
+ }
+ __ Bind(&uncompressed_string);
__ Bic(temp1, temp1, 0xf);
+ __ Bind(&continue_process);
+
__ Lsr(temp0, temp0, temp1);
__ Lsr(temp4, temp4, temp1);
+ vixl::aarch64::Label uncompressed_string_extract_chars;
+ if (mirror::kUseStringCompression) {
+ __ Tbz(temp5, kWRegSize - 1, &uncompressed_string_extract_chars);
+ __ And(temp4, temp4, 0xff);
+ __ Sub(out, temp4.W(), Operand(temp0.W(), UXTB));
+ __ B(&end);
+ }
+ __ Bind(&uncompressed_string_extract_chars);
__ And(temp4, temp4, 0xffff);
__ Sub(out, temp4.W(), Operand(temp0.W(), UXTH));
+ __ B(&end);
+
+ if (mirror::kUseStringCompression) {
+ vixl::aarch64::Label loop_this_compressed, loop_arg_compressed, find_diff;
+ const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+ DCHECK_EQ(c_char_size, 1u);
+ temp0 = temp0.W();
+ temp1 = temp1.W();
+ // Comparison for different compression style.
+ // This part is when THIS is compressed and ARG is not.
+ __ Bind(&different_compression);
+ __ Add(temp0, str, Operand(value_offset));
+ __ Add(temp1, arg, Operand(value_offset));
+ __ Cmp(temp5, Operand(0));
+ __ B(lt, &loop_arg_compressed);
+
+ __ Bind(&loop_this_compressed);
+ __ Ldrb(temp3, MemOperand(temp0.X(), c_char_size, PostIndex));
+ __ Ldrh(temp5, MemOperand(temp1.X(), char_size, PostIndex));
+ __ Cmp(temp3, Operand(temp5));
+ __ B(ne, &find_diff);
+ __ Subs(temp2, temp2, 1);
+ __ B(gt, &loop_this_compressed);
+ __ B(&end);
+
+ // This part is when THIS is not compressed and ARG is.
+ __ Bind(&loop_arg_compressed);
+ __ Ldrh(temp3, MemOperand(temp0.X(), char_size, PostIndex));
+ __ Ldrb(temp5, MemOperand(temp1.X(), c_char_size, PostIndex));
+ __ Cmp(temp3, Operand(temp5));
+ __ B(ne, &find_diff);
+ __ Subs(temp2, temp2, 1);
+ __ B(gt, &loop_arg_compressed);
+ __ B(&end);
+
+ // Calculate the difference.
+ __ Bind(&find_diff);
+ __ Sub(out, temp3.W(), Operand(temp5.W(), UXTH));
+ }
__ Bind(&end);
@@ -1356,7 +1451,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
Register temp1 = WRegisterFrom(locations->GetTemp(0));
Register temp2 = WRegisterFrom(locations->GetTemp(1));
- vixl::aarch64::Label loop;
+ vixl::aarch64::Label loop, preloop;
vixl::aarch64::Label end;
vixl::aarch64::Label return_true;
vixl::aarch64::Label return_false;
@@ -1394,22 +1489,37 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
__ Ldr(temp, MemOperand(str.X(), count_offset));
__ Ldr(temp1, MemOperand(arg.X(), count_offset));
// Check if lengths are equal, return false if they're not.
+ // Also compares the compression style, if differs return false.
__ Cmp(temp, temp1);
__ B(&return_false, ne);
- // Store offset of string value in preparation for comparison loop
- __ Mov(temp1, value_offset);
// Return true if both strings are empty.
+ if (mirror::kUseStringCompression) {
+ // Length needs to be masked out first because 0 is treated as compressed.
+ __ Bic(temp, temp, Operand(static_cast<int32_t>(0x80000000)));
+ }
__ Cbz(temp, &return_true);
// Assertions that must hold in order to compare strings 4 characters at a time.
DCHECK_ALIGNED(value_offset, 8);
static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
+ if (mirror::kUseStringCompression) {
+ // If not compressed, directly to fast compare. Else do preprocess on length.
+ __ Cmp(temp1, Operand(0));
+ __ B(&preloop, gt);
+ // Mask out compression flag and adjust length for compressed string (8-bit)
+ // as if it is a 16-bit data, new_length = (length + 1) / 2
+ __ Add(temp, temp, 1);
+ __ Lsr(temp, temp, 1);
+ }
+
temp1 = temp1.X();
temp2 = temp2.X();
-
// Loop to compare strings 4 characters at a time starting at the beginning of the string.
// Ok to do this because strings are zero-padded to be 8-byte aligned.
+ // Store offset of string value in preparation for comparison loop
+ __ Bind(&preloop);
+ __ Mov(temp1, value_offset);
__ Bind(&loop);
__ Ldr(out, MemOperand(str.X(), temp1));
__ Ldr(temp2, MemOperand(arg.X(), temp1));
@@ -1773,6 +1883,10 @@ void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke)
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ // Need temporary register for String compression feature.
+ if (mirror::kUseStringCompression) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -1800,29 +1914,41 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Register src_ptr = XRegisterFrom(locations->GetTemp(0));
Register num_chr = XRegisterFrom(locations->GetTemp(1));
Register tmp1 = XRegisterFrom(locations->GetTemp(2));
+ Register tmp3;
+ if (mirror::kUseStringCompression) {
+ tmp3 = WRegisterFrom(locations->GetTemp(3));
+ }
UseScratchRegisterScope temps(masm);
Register dst_ptr = temps.AcquireX();
Register tmp2 = temps.AcquireX();
- // src address to copy from.
- __ Add(src_ptr, srcObj, Operand(value_offset));
- __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
+ vixl::aarch64::Label done;
+ vixl::aarch64::Label compressed_string_loop;
+ __ Sub(num_chr, srcEnd, srcBegin);
+ // Early out for valid zero-length retrievals.
+ __ Cbz(num_chr, &done);
// dst address start to copy to.
__ Add(dst_ptr, dstObj, Operand(data_offset));
__ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
- __ Sub(num_chr, srcEnd, srcBegin);
+ // src address to copy from.
+ __ Add(src_ptr, srcObj, Operand(value_offset));
+ vixl::aarch64::Label compressed_string_preloop;
+ if (mirror::kUseStringCompression) {
+ // Location of count in string.
+ const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ // String's length.
+ __ Ldr(tmp3, MemOperand(srcObj, count_offset));
+ __ Tbnz(tmp3, kWRegSize - 1, &compressed_string_preloop);
+ }
+ __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
// Do the copy.
vixl::aarch64::Label loop;
- vixl::aarch64::Label done;
vixl::aarch64::Label remainder;
- // Early out for valid zero-length retrievals.
- __ Cbz(num_chr, &done);
-
// Save repairing the value of num_chr on the < 8 character path.
__ Subs(tmp1, num_chr, 8);
__ B(lt, &remainder);
@@ -1848,6 +1974,20 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Subs(num_chr, num_chr, 1);
__ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
__ B(gt, &remainder);
+ __ B(&done);
+
+ if (mirror::kUseStringCompression) {
+ const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+ DCHECK_EQ(c_char_size, 1u);
+ __ Bind(&compressed_string_preloop);
+ __ Add(src_ptr, src_ptr, Operand(srcBegin));
+ // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
+ __ Bind(&compressed_string_loop);
+ __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
+ __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
+ __ Subs(num_chr, num_chr, Operand(1));
+ __ B(gt, &compressed_string_loop);
+ }
__ Bind(&done);
}