diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 29 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 19 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 29 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 14 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 14 | ||||
| -rw-r--r-- | compiler/optimizing/instruction_simplifier_arm64.cc | 7 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm.cc | 209 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 209 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 228 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 50 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 46 |
11 files changed, 453 insertions, 401 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 08227fc8c8..25d3855e39 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -4864,16 +4864,21 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimShort: case Primitive::kPrimChar: case Primitive::kPrimInt: { + Register length; + if (maybe_compressed_char_at) { + length = locations->GetTemp(0).AsRegister<Register>(); + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + __ LoadFromOffset(kLoadWord, length, obj, count_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } if (index.IsConstant()) { int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); if (maybe_compressed_char_at) { - Register length = IP; Label uncompressed_load, done; - uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); - __ LoadFromOffset(kLoadWord, length, obj, count_offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ cmp(length, ShifterOperand(0)); - __ b(&uncompressed_load, GE); + __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ b(&uncompressed_load, CS); __ LoadFromOffset(kLoadUnsignedByte, out_loc.AsRegister<Register>(), obj, @@ -4908,12 +4913,10 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } if (maybe_compressed_char_at) { Label uncompressed_load, done; - uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); - Register length = locations->GetTemp(0).AsRegister<Register>(); - __ LoadFromOffset(kLoadWord, length, obj, count_offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ cmp(length, ShifterOperand(0)); - __ b(&uncompressed_load, GE); + __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ b(&uncompressed_load, CS); __ ldrb(out_loc.AsRegister<Register>(), Address(temp, index.AsRegister<Register>(), Shift::LSL, 0)); __ b(&done); @@ -5318,7 +5321,7 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); // Mask out compression flag from String's array length. if (mirror::kUseStringCompression && instruction->IsStringLength()) { - __ bic(out, out, ShifterOperand(1u << 31)); + __ Lsr(out, out, 1u); } } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index aef46c8c5d..c54e8e1130 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -2333,13 +2333,22 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { if (maybe_compressed_char_at) { uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); length = temps.AcquireW(); - __ Ldr(length, HeapOperand(obj, count_offset)); + if (instruction->GetArray()->IsIntermediateAddress()) { + DCHECK_LT(count_offset, offset); + int64_t adjusted_offset = static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset); + // Note that `adjusted_offset` is negative, so this will be a LDUR. + __ Ldr(length, MemOperand(obj.X(), adjusted_offset)); + } else { + __ Ldr(length, HeapOperand(obj, count_offset)); + } codegen_->MaybeRecordImplicitNullCheck(instruction); } if (index.IsConstant()) { if (maybe_compressed_char_at) { vixl::aarch64::Label uncompressed_load, done; - __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load); + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ Tbnz(length.W(), 0, &uncompressed_load); __ Ldrb(Register(OutputCPURegister(instruction)), HeapOperand(obj, offset + Int64ConstantFrom(index))); __ B(&done); @@ -2367,7 +2376,9 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } if (maybe_compressed_char_at) { vixl::aarch64::Label uncompressed_load, done; - __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load); + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ Tbnz(length.W(), 0, &uncompressed_load); __ Ldrb(Register(OutputCPURegister(instruction)), HeapOperand(temp, XRegisterFrom(index), LSL, 0)); __ B(&done); @@ -2412,7 +2423,7 @@ void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) codegen_->MaybeRecordImplicitNullCheck(instruction); // Mask out compression flag from String's array length. if (mirror::kUseStringCompression && instruction->IsStringLength()) { - __ And(out.W(), out.W(), Operand(static_cast<int32_t>(INT32_MAX))); + __ Lsr(out.W(), out.W(), 1u); } } diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index b9814b63e9..e7039e637d 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -3798,16 +3798,21 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimShort: case Primitive::kPrimChar: case Primitive::kPrimInt: { + vixl32::Register length; + if (maybe_compressed_char_at) { + length = RegisterFrom(locations->GetTemp(0)); + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } if (index.IsConstant()) { int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); if (maybe_compressed_char_at) { - vixl32::Register length = temps.Acquire(); vixl32::Label uncompressed_load, done; - uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); - GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ Cmp(length, 0); - __ B(ge, &uncompressed_load); + __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ B(cs, &uncompressed_load); GetAssembler()->LoadFromOffset(kLoadUnsignedByte, RegisterFrom(out_loc), obj, @@ -3835,12 +3840,10 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { } if (maybe_compressed_char_at) { vixl32::Label uncompressed_load, done; - uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); - vixl32::Register length = RegisterFrom(locations->GetTemp(0)); - GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ Cmp(length, 0); - __ B(ge, &uncompressed_load); + __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ B(cs, &uncompressed_load); __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0)); __ B(&done); __ Bind(&uncompressed_load); @@ -4219,7 +4222,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction codegen_->MaybeRecordImplicitNullCheck(instruction); // Mask out compression flag from String's array length. if (mirror::kUseStringCompression && instruction->IsStringLength()) { - __ Bic(out, out, 1u << 31); + __ Lsr(out, out, 1u); } } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 4aa5f2c86e..8c6580690b 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -151,7 +151,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { } __ movl(length_loc.AsRegister<Register>(), array_len); if (mirror::kUseStringCompression) { - __ andl(length_loc.AsRegister<Register>(), Immediate(INT32_MAX)); + __ shrl(length_loc.AsRegister<Register>(), Immediate(1)); } } x86_codegen->EmitParallelMoves( @@ -5243,9 +5243,11 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { // Branch cases into compressed and uncompressed for each index's type. uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); NearLabel done, not_compressed; - __ cmpl(Address(obj, count_offset), Immediate(0)); + __ testl(Address(obj, count_offset), Immediate(1)); codegen_->MaybeRecordImplicitNullCheck(instruction); - __ j(kGreaterEqual, ¬_compressed); + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ j(kNotZero, ¬_compressed); __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset)); __ jmp(&done); __ Bind(¬_compressed); @@ -5595,7 +5597,7 @@ void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); // Mask out most significant bit in case the array is String's array of char. if (mirror::kUseStringCompression && instruction->IsStringLength()) { - __ andl(out, Immediate(INT32_MAX)); + __ shrl(out, Immediate(1)); } } @@ -5654,10 +5656,12 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { Location array_loc = array_length->GetLocations()->InAt(0); Address array_len(array_loc.AsRegister<Register>(), len_offset); if (is_string_compressed_char_at) { + // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for + // the string compression flag) with the in-memory length and avoid the temporary. Register length_reg = locations->GetTemp(0).AsRegister<Register>(); __ movl(length_reg, array_len); codegen_->MaybeRecordImplicitNullCheck(array_length); - __ andl(length_reg, Immediate(INT32_MAX)); + __ shrl(length_reg, Immediate(1)); codegen_->GenerateIntCompare(length_reg, index_loc); } else { // Checking bounds for general case: diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 3612c75b78..524cd5b21d 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -199,7 +199,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { } __ movl(length_loc.AsRegister<CpuRegister>(), array_len); if (mirror::kUseStringCompression) { - __ andl(length_loc.AsRegister<CpuRegister>(), Immediate(INT32_MAX)); + __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1)); } } @@ -4732,9 +4732,11 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { // Branch cases into compressed and uncompressed for each index's type. uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); NearLabel done, not_compressed; - __ cmpl(Address(obj, count_offset), Immediate(0)); + __ testl(Address(obj, count_offset), Immediate(1)); codegen_->MaybeRecordImplicitNullCheck(instruction); - __ j(kGreaterEqual, ¬_compressed); + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ j(kNotZero, ¬_compressed); __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); __ jmp(&done); __ Bind(¬_compressed); @@ -5066,7 +5068,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) codegen_->MaybeRecordImplicitNullCheck(instruction); // Mask out most significant bit in case the array is String's array of char. if (mirror::kUseStringCompression && instruction->IsStringLength()) { - __ andl(out, Immediate(INT32_MAX)); + __ shrl(out, Immediate(1)); } } @@ -5118,10 +5120,12 @@ void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) Location array_loc = array_length->GetLocations()->InAt(0); Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for + // the string compression flag) with the in-memory length and avoid the temporary. CpuRegister length_reg = CpuRegister(TMP); __ movl(length_reg, array_len); codegen_->MaybeRecordImplicitNullCheck(array_length); - __ andl(length_reg, Immediate(INT32_MAX)); + __ shrl(length_reg, Immediate(1)); codegen_->GenerateIntCompare(length_reg, index_loc); } else { // Checking the bound for general case: diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index d0dd650024..6d107d571f 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -140,13 +140,6 @@ void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) { void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); - // Don't move the array pointer if it is charAt because we need to take the count first. - // TODO: Implement reading (length + compression) for String compression feature from - // negative offset (count_offset - data_offset) using LDP and clobbering an extra temporary. - // Note that "LDR (Immediate)" does not have a "signed offset" encoding. - if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { - return; - } if (TryExtractArrayAccessAddress(instruction, instruction->GetArray(), instruction->GetIndex(), diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 0c39223388..8234b2467d 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1058,7 +1058,6 @@ void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) { // Need temporary registers for String compression's feature. if (mirror::kUseStringCompression) { locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); } locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } @@ -1074,10 +1073,9 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { Register temp0 = locations->GetTemp(0).AsRegister<Register>(); Register temp1 = locations->GetTemp(1).AsRegister<Register>(); Register temp2 = locations->GetTemp(2).AsRegister<Register>(); - Register temp3, temp4; + Register temp3; if (mirror::kUseStringCompression) { temp3 = locations->GetTemp(3).AsRegister<Register>(); - temp4 = locations->GetTemp(4).AsRegister<Register>(); } Label loop; @@ -1104,41 +1102,42 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { // Reference equality check, return 0 if same reference. __ subs(out, str, ShifterOperand(arg)); __ b(&end, EQ); + if (mirror::kUseStringCompression) { - // Load lengths of this and argument strings. + // Load `count` fields of this and argument strings. __ ldr(temp3, Address(str, count_offset)); - __ ldr(temp4, Address(arg, count_offset)); - // Clean out compression flag from lengths. - __ bic(temp0, temp3, ShifterOperand(0x80000000)); - __ bic(IP, temp4, ShifterOperand(0x80000000)); + __ ldr(temp2, Address(arg, count_offset)); + // Extract lengths from the `count` fields. + __ Lsr(temp0, temp3, 1u); + __ Lsr(temp1, temp2, 1u); } else { // Load lengths of this and argument strings. __ ldr(temp0, Address(str, count_offset)); - __ ldr(IP, Address(arg, count_offset)); + __ ldr(temp1, Address(arg, count_offset)); } // out = length diff. - __ subs(out, temp0, ShifterOperand(IP)); + __ subs(out, temp0, ShifterOperand(temp1)); // temp0 = min(len(str), len(arg)). __ it(GT); - __ mov(temp0, ShifterOperand(IP), GT); + __ mov(temp0, ShifterOperand(temp1), GT); // Shorter string is empty? __ CompareAndBranchIfZero(temp0, &end); if (mirror::kUseStringCompression) { // Check if both strings using same compression style to use this comparison loop. - __ eors(temp3, temp3, ShifterOperand(temp4)); - __ b(&different_compression, MI); - } - // Store offset of string value in preparation for comparison loop. - __ mov(temp1, ShifterOperand(value_offset)); - if (mirror::kUseStringCompression) { + __ eor(temp2, temp2, ShifterOperand(temp3)); + __ Lsrs(temp2, temp2, 1u); + __ b(&different_compression, CS); // For string compression, calculate the number of bytes to compare (not chars). // This could in theory exceed INT32_MAX, so treat temp0 as unsigned. - __ cmp(temp4, ShifterOperand(0)); - __ it(GE); - __ add(temp0, temp0, ShifterOperand(temp0), GE); + __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag. + __ it(NE); + __ add(temp0, temp0, ShifterOperand(temp0), NE); } + // Store offset of string value in preparation for comparison loop. + __ mov(temp1, ShifterOperand(value_offset)); + // Assertions that must hold in order to compare multiple characters at a time. CHECK_ALIGNED(value_offset, 8); static_assert(IsAligned<8>(kObjectAlignment), @@ -1198,69 +1197,80 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { // The comparison is unsigned for string compression, otherwise signed. __ cmp(temp0, ShifterOperand(temp1, LSR, mirror::kUseStringCompression ? 3 : 4)); __ b(&end, mirror::kUseStringCompression ? LS : LE); + // Extract the characters and calculate the difference. - Label uncompressed_string, continue_process; if (mirror::kUseStringCompression) { - __ cmp(temp4, ShifterOperand(0)); - __ b(&uncompressed_string, GE); - __ bic(temp1, temp1, ShifterOperand(0x7)); - __ b(&continue_process); + // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear + // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`. + // The compression flag is now in the highest bit of temp3, so let's play some tricks. + __ orr(temp3, temp3, ShifterOperand(0xffu << 23)); // uncompressed ? 0xff800000u : 0x7ff80000u + __ bic(temp1, temp1, ShifterOperand(temp3, LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u) + __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u. + __ Lsr(temp2, temp2, temp1); // Extract second character. + __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu + __ Lsr(out, IP, temp1); // Extract first character. + __ and_(temp2, temp2, ShifterOperand(temp3)); + __ and_(out, out, ShifterOperand(temp3)); + } else { + __ bic(temp1, temp1, ShifterOperand(0xf)); + __ Lsr(temp2, temp2, temp1); + __ Lsr(out, IP, temp1); + __ movt(temp2, 0); + __ movt(out, 0); } - __ Bind(&uncompressed_string); - __ bic(temp1, temp1, ShifterOperand(0xf)); - __ Bind(&continue_process); - __ Lsr(temp2, temp2, temp1); - __ Lsr(IP, IP, temp1); - Label calculate_difference, uncompressed_string_extract_chars; - if (mirror::kUseStringCompression) { - __ cmp(temp4, ShifterOperand(0)); - __ b(&uncompressed_string_extract_chars, GE); - __ ubfx(temp2, temp2, 0, 8); - __ ubfx(IP, IP, 0, 8); - __ b(&calculate_difference); - } - __ Bind(&uncompressed_string_extract_chars); - __ movt(temp2, 0); - __ movt(IP, 0); - __ Bind(&calculate_difference); - __ sub(out, IP, ShifterOperand(temp2)); - __ b(&end); + __ sub(out, out, ShifterOperand(temp2)); if (mirror::kUseStringCompression) { + __ b(&end); + __ Bind(&different_compression); + + // Comparison for different compression style. const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); DCHECK_EQ(c_char_size, 1u); - Label loop_arg_compressed, loop_this_compressed, find_diff; - // Comparison for different compression style. - // This part is when THIS is compressed and ARG is not. - __ Bind(&different_compression); - __ add(temp2, str, ShifterOperand(value_offset)); - __ add(temp3, arg, ShifterOperand(value_offset)); - __ cmp(temp4, ShifterOperand(0)); - __ b(&loop_arg_compressed, LT); - - __ Bind(&loop_this_compressed); - __ ldrb(IP, Address(temp2, c_char_size, Address::PostIndex)); - __ ldrh(temp4, Address(temp3, char_size, Address::PostIndex)); - __ cmp(IP, ShifterOperand(temp4)); - __ b(&find_diff, NE); - __ subs(temp0, temp0, ShifterOperand(1)); - __ b(&loop_this_compressed, GT); - __ b(&end); - // This part is when THIS is not compressed and ARG is. - __ Bind(&loop_arg_compressed); - __ ldrh(IP, Address(temp2, char_size, Address::PostIndex)); - __ ldrb(temp4, Address(temp3, c_char_size, Address::PostIndex)); - __ cmp(IP, ShifterOperand(temp4)); - __ b(&find_diff, NE); - __ subs(temp0, temp0, ShifterOperand(1)); - __ b(&loop_arg_compressed, GT); + // We want to free up the temp3, currently holding `str.count`, for comparison. + // So, we move it to the bottom bit of the iteration count `temp0` which we tnen + // need to treat as unsigned. Start by freeing the bit with an ADD and continue + // further down by a LSRS+SBC which will flip the meaning of the flag but allow + // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition. + __ add(temp0, temp0, ShifterOperand(temp0)); // Unlike LSL, this ADD is always 16-bit. + // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer. + __ mov(temp1, ShifterOperand(str)); + __ mov(temp2, ShifterOperand(arg)); + __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag. + __ it(CS, kItThen); // Interleave with selection of temp1 and temp2. + __ mov(temp1, ShifterOperand(arg), CS); // Preserves flags. + __ mov(temp2, ShifterOperand(str), CS); // Preserves flags. + __ sbc(temp0, temp0, ShifterOperand(0)); // Complete the move of the compression flag. + + // Adjust temp1 and temp2 from string pointers to data pointers. + __ add(temp1, temp1, ShifterOperand(value_offset)); + __ add(temp2, temp2, ShifterOperand(value_offset)); + + Label different_compression_loop; + Label different_compression_diff; + + // Main loop for different compression. + __ Bind(&different_compression_loop); + __ ldrb(IP, Address(temp1, c_char_size, Address::PostIndex)); + __ ldrh(temp3, Address(temp2, char_size, Address::PostIndex)); + __ cmp(IP, ShifterOperand(temp3)); + __ b(&different_compression_diff, NE); + __ subs(temp0, temp0, ShifterOperand(2)); + __ b(&different_compression_loop, HI); __ b(&end); // Calculate the difference. - __ Bind(&find_diff); - __ sub(out, IP, ShifterOperand(temp4)); + __ Bind(&different_compression_diff); + __ sub(out, IP, ShifterOperand(temp3)); + // Flip the difference if the `arg` is compressed. + // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag. + __ Lsrs(temp0, temp0, 1u); + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ it(CC); + __ rsb(out, out, ShifterOperand(0), CC); } __ Bind(&end); @@ -1298,7 +1308,7 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { Register temp1 = locations->GetTemp(1).AsRegister<Register>(); Register temp2 = locations->GetTemp(2).AsRegister<Register>(); - Label loop, preloop; + Label loop; Label end; Label return_true; Label return_false; @@ -1317,6 +1327,10 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { __ CompareAndBranchIfZero(arg, &return_false); } + // Reference equality check, return true if same reference. + __ cmp(str, ShifterOperand(arg)); + __ b(&return_true, EQ); + if (!optimizations.GetArgumentIsString()) { // Instanceof check for the argument by comparing class fields. // All string objects must have the same type since String cannot be subclassed. @@ -1328,48 +1342,44 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { __ b(&return_false, NE); } - // Load lengths of this and argument strings. + // Load `count` fields of this and argument strings. __ ldr(temp, Address(str, count_offset)); __ ldr(temp1, Address(arg, count_offset)); - // Check if lengths are equal, return false if they're not. + // Check if `count` fields are equal, return false if they're not. // Also compares the compression style, if differs return false. __ cmp(temp, ShifterOperand(temp1)); __ b(&return_false, NE); - // Return true if both strings are empty. - if (mirror::kUseStringCompression) { - // Length needs to be masked out first because 0 is treated as compressed. - __ bic(temp, temp, ShifterOperand(0x80000000)); - } + // Return true if both strings are empty. Even with string compression `count == 0` means empty. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); __ cbz(temp, &return_true); - // Reference equality check, return true if same reference. - __ cmp(str, ShifterOperand(arg)); - __ b(&return_true, EQ); - // Assertions that must hold in order to compare strings 2 characters at a time. + // Assertions that must hold in order to compare strings 4 bytes at a time. DCHECK_ALIGNED(value_offset, 4); static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare."); if (mirror::kUseStringCompression) { - // If not compressed, directly to fast compare. Else do preprocess on length. - __ cmp(temp1, ShifterOperand(0)); - __ b(&preloop, GT); - // Mask out compression flag and adjust length for compressed string (8-bit) - // as if it is a 16-bit data, new_length = (length + 1) / 2. - __ add(temp, temp, ShifterOperand(1)); - __ Lsr(temp, temp, 1); - __ Bind(&preloop); + // For string compression, calculate the number of bytes to compare (not chars). + // This could in theory exceed INT32_MAX, so treat temp as unsigned. + __ Lsrs(temp, temp, 1u); // Extract length and check compression flag. + __ it(CS); // If uncompressed, + __ add(temp, temp, ShifterOperand(temp), CS); // double the byte count. } - // Loop to compare strings 2 characters at a time starting at the front of the string. - // Ok to do this because strings with an odd length are zero-padded. + + // Store offset of string value in preparation for comparison loop. __ LoadImmediate(temp1, value_offset); + + // Loop to compare strings 4 bytes at a time starting at the front of the string. + // Ok to do this because strings are zero-padded to kObjectAlignment. __ Bind(&loop); __ ldr(out, Address(str, temp1)); __ ldr(temp2, Address(arg, temp1)); + __ add(temp1, temp1, ShifterOperand(sizeof(uint32_t))); __ cmp(out, ShifterOperand(temp2)); __ b(&return_false, NE); - __ add(temp1, temp1, ShifterOperand(sizeof(uint32_t))); - __ subs(temp, temp, ShifterOperand(sizeof(uint32_t) / sizeof(uint16_t))); - __ b(&loop, GT); + // With string compression, we have compared 4 bytes, otherwise 2 chars. + __ subs(temp, temp, ShifterOperand(mirror::kUseStringCompression ? 4 : 2)); + __ b(&loop, HI); // Return true and exit the function. // If loop does not result in returning false, we return true. @@ -2477,8 +2487,8 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); // String's length. __ ldr(IP, Address(srcObj, count_offset)); - __ cmp(IP, ShifterOperand(0)); - __ b(&compressed_string_preloop, LT); + __ tst(IP, ShifterOperand(1)); + __ b(&compressed_string_preloop, EQ); } __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1)); @@ -2513,9 +2523,10 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ subs(num_chr, num_chr, ShifterOperand(1)); __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex)); __ b(&remainder, GT); - __ b(&done); if (mirror::kUseStringCompression) { + __ b(&done); + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); DCHECK_EQ(c_char_size, 1u); // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index b9424a3f20..451abc56d3 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1243,7 +1243,6 @@ void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { // Need temporary registers for String compression's feature. if (mirror::kUseStringCompression) { locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); } locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } @@ -1261,10 +1260,9 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { Register temp0 = WRegisterFrom(locations->GetTemp(0)); Register temp1 = WRegisterFrom(locations->GetTemp(1)); Register temp2 = WRegisterFrom(locations->GetTemp(2)); - Register temp3, temp5; + Register temp3; if (mirror::kUseStringCompression) { temp3 = WRegisterFrom(locations->GetTemp(3)); - temp5 = WRegisterFrom(locations->GetTemp(4)); } vixl::aarch64::Label loop; @@ -1291,68 +1289,65 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { // Reference equality check, return 0 if same reference. __ Subs(out, str, arg); __ B(&end, eq); + if (mirror::kUseStringCompression) { - // Load lengths of this and argument strings. + // Load `count` fields of this and argument strings. __ Ldr(temp3, HeapOperand(str, count_offset)); - __ Ldr(temp5, HeapOperand(arg, count_offset)); + __ Ldr(temp2, HeapOperand(arg, count_offset)); // Clean out compression flag from lengths. - __ Bic(temp0, temp3, Operand(static_cast<int32_t>(0x80000000))); - __ Bic(temp1, temp5, Operand(static_cast<int32_t>(0x80000000))); + __ Lsr(temp0, temp3, 1u); + __ Lsr(temp1, temp2, 1u); } else { // Load lengths of this and argument strings. __ Ldr(temp0, HeapOperand(str, count_offset)); __ Ldr(temp1, HeapOperand(arg, count_offset)); } - // Return zero if both strings are empty. - __ Orr(out, temp0, temp1); - __ Cbz(out, &end); // out = length diff. __ Subs(out, temp0, temp1); - // temp2 = min(len(str), len(arg)). - __ Csel(temp2, temp1, temp0, ge); + // temp0 = min(len(str), len(arg)). + __ Csel(temp0, temp1, temp0, ge); // Shorter string is empty? - __ Cbz(temp2, &end); + __ Cbz(temp0, &end); if (mirror::kUseStringCompression) { // Check if both strings using same compression style to use this comparison loop. - __ Eor(temp3.W(), temp3, Operand(temp5)); - __ Tbnz(temp3.W(), kWRegSize - 1, &different_compression); + __ Eor(temp2, temp2, Operand(temp3)); + // Interleave with compression flag extraction which is needed for both paths + // and also set flags which is needed only for the different compressions path. + __ Ands(temp3.W(), temp3.W(), Operand(1)); + __ Tbnz(temp2, 0, &different_compression); // Does not use flags. } // Store offset of string value in preparation for comparison loop. __ Mov(temp1, value_offset); if (mirror::kUseStringCompression) { // For string compression, calculate the number of bytes to compare (not chars). - // This could be in theory exceed INT32_MAX, so treat temp2 as unsigned. - vixl::aarch64::Label let_it_signed; - __ Cmp(temp5, Operand(0)); - __ B(lt, &let_it_signed); - __ Add(temp2, temp2, Operand(temp2)); - __ Bind(&let_it_signed); + // This could in theory exceed INT32_MAX, so treat temp0 as unsigned. + __ Lsl(temp0, temp0, temp3); } UseScratchRegisterScope scratch_scope(masm); Register temp4 = scratch_scope.AcquireX(); - // Assertions that must hold in order to compare strings 4 characters at a time. + // Assertions that must hold in order to compare strings 8 bytes at a time. DCHECK_ALIGNED(value_offset, 8); static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); DCHECK_EQ(char_size, 2u); - // Promote temp0 to an X reg, ready for LDR. - temp0 = temp0.X(); + // Promote temp2 to an X reg, ready for LDR. + temp2 = temp2.X(); // Loop to compare 4x16-bit characters at a time (ok because of string data alignment). __ Bind(&loop); __ Ldr(temp4, MemOperand(str.X(), temp1.X())); - __ Ldr(temp0, MemOperand(arg.X(), temp1.X())); - __ Cmp(temp4, temp0); + __ Ldr(temp2, MemOperand(arg.X(), temp1.X())); + __ Cmp(temp4, temp2); __ B(ne, &find_char_diff); __ Add(temp1, temp1, char_size * 4); // With string compression, we have compared 8 bytes, otherwise 4 chars. - __ Subs(temp2, temp2, (mirror::kUseStringCompression) ? 8 : 4); - __ B(hi, &loop); + __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4); + __ B(&loop, hi); __ B(&end); // Promote temp1 to an X reg, ready for EOR. @@ -1361,78 +1356,85 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { // Find the single character difference. __ Bind(&find_char_diff); // Get the bit position of the first character that differs. - __ Eor(temp1, temp0, temp4); + __ Eor(temp1, temp2, temp4); __ Rbit(temp1, temp1); __ Clz(temp1, temp1); + // If the number of chars remaining <= the index where the difference occurs (0-3), then // the difference occurs outside the remaining string data, so just return length diff (out). // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or // unsigned when string compression is disabled. // When it's enabled, the comparison must be unsigned. - __ Cmp(temp2, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4)); + __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4)); __ B(ls, &end); + // Extract the characters and calculate the difference. - vixl::aarch64::Label uncompressed_string, continue_process; if (mirror:: kUseStringCompression) { - __ Tbz(temp5, kWRegSize - 1, &uncompressed_string); __ Bic(temp1, temp1, 0x7); - __ B(&continue_process); + __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u)); + } else { + __ Bic(temp1, temp1, 0xf); } - __ Bind(&uncompressed_string); - __ Bic(temp1, temp1, 0xf); - __ Bind(&continue_process); - - __ Lsr(temp0, temp0, temp1); + __ Lsr(temp2, temp2, temp1); __ Lsr(temp4, temp4, temp1); - vixl::aarch64::Label uncompressed_string_extract_chars; if (mirror::kUseStringCompression) { - __ Tbz(temp5, kWRegSize - 1, &uncompressed_string_extract_chars); - __ And(temp4, temp4, 0xff); - __ Sub(out, temp4.W(), Operand(temp0.W(), UXTB)); - __ B(&end); + // Prioritize the case of compressed strings and calculate such result first. + __ Uxtb(temp1, temp4); + __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB)); + __ Tbz(temp3, 0u, &end); // If actually compressed, we're done. } - __ Bind(&uncompressed_string_extract_chars); - __ And(temp4, temp4, 0xffff); - __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH)); - __ B(&end); + __ Uxth(temp4, temp4); + __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH)); if (mirror::kUseStringCompression) { - vixl::aarch64::Label loop_this_compressed, loop_arg_compressed, find_diff; + __ B(&end); + __ Bind(&different_compression); + + // Comparison for different compression style. const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); DCHECK_EQ(c_char_size, 1u); - temp0 = temp0.W(); temp1 = temp1.W(); - // Comparison for different compression style. - // This part is when THIS is compressed and ARG is not. - __ Bind(&different_compression); - __ Add(temp0, str, Operand(value_offset)); - __ Add(temp1, arg, Operand(value_offset)); - __ Cmp(temp5, Operand(0)); - __ B(lt, &loop_arg_compressed); - - __ Bind(&loop_this_compressed); - __ Ldrb(temp3, MemOperand(temp0.X(), c_char_size, PostIndex)); - __ Ldrh(temp5, MemOperand(temp1.X(), char_size, PostIndex)); - __ Cmp(temp3, Operand(temp5)); - __ B(ne, &find_diff); - __ Subs(temp2, temp2, 1); - __ B(gt, &loop_this_compressed); - __ B(&end); - - // This part is when THIS is not compressed and ARG is. - __ Bind(&loop_arg_compressed); - __ Ldrh(temp3, MemOperand(temp0.X(), char_size, PostIndex)); - __ Ldrb(temp5, MemOperand(temp1.X(), c_char_size, PostIndex)); - __ Cmp(temp3, Operand(temp5)); - __ B(ne, &find_diff); - __ Subs(temp2, temp2, 1); - __ B(gt, &loop_arg_compressed); + temp2 = temp2.W(); + temp4 = temp4.W(); + + // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer. + // Note that flags have been set by the `str` compression flag extraction to `temp3` + // before branching to the `different_compression` label. + __ Csel(temp1, str, arg, eq); // Pointer to the compressed string. + __ Csel(temp2, str, arg, ne); // Pointer to the uncompressed string. + + // We want to free up the temp3, currently holding `str` compression flag, for comparison. + // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat + // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which + // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition. + __ Lsl(temp0, temp0, 1u); + + // Adjust temp1 and temp2 from string pointers to data pointers. + __ Add(temp1, temp1, Operand(value_offset)); + __ Add(temp2, temp2, Operand(value_offset)); + + // Complete the move of the compression flag. + __ Sub(temp0, temp0, Operand(temp3)); + + vixl::aarch64::Label different_compression_loop; + vixl::aarch64::Label different_compression_diff; + + __ Bind(&different_compression_loop); + __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex)); + __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex)); + __ Subs(temp4, temp4, Operand(temp3)); + __ B(&different_compression_diff, ne); + __ Subs(temp0, temp0, 2); + __ B(&different_compression_loop, hi); __ B(&end); // Calculate the difference. - __ Bind(&find_diff); - __ Sub(out, temp3.W(), Operand(temp5.W(), UXTH)); + __ Bind(&different_compression_diff); + __ Tst(temp0, Operand(1)); + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ Cneg(out, temp4, ne); } __ Bind(&end); @@ -1468,7 +1470,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { Register temp1 = WRegisterFrom(locations->GetTemp(0)); Register temp2 = WRegisterFrom(locations->GetTemp(1)); - vixl::aarch64::Label loop, preloop; + vixl::aarch64::Label loop; vixl::aarch64::Label end; vixl::aarch64::Label return_true; vixl::aarch64::Label return_false; @@ -1502,49 +1504,46 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { __ B(&return_false, ne); } - // Load lengths of this and argument strings. + // Load `count` fields of this and argument strings. __ Ldr(temp, MemOperand(str.X(), count_offset)); __ Ldr(temp1, MemOperand(arg.X(), count_offset)); - // Check if lengths are equal, return false if they're not. + // Check if `count` fields are equal, return false if they're not. // Also compares the compression style, if differs return false. __ Cmp(temp, temp1); __ B(&return_false, ne); - // Return true if both strings are empty. - if (mirror::kUseStringCompression) { - // Length needs to be masked out first because 0 is treated as compressed. - __ Bic(temp, temp, Operand(static_cast<int32_t>(0x80000000))); - } + // Return true if both strings are empty. Even with string compression `count == 0` means empty. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); __ Cbz(temp, &return_true); - // Assertions that must hold in order to compare strings 4 characters at a time. + // Assertions that must hold in order to compare strings 8 bytes at a time. DCHECK_ALIGNED(value_offset, 8); static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); if (mirror::kUseStringCompression) { - // If not compressed, directly to fast compare. Else do preprocess on length. - __ Cmp(temp1, Operand(0)); - __ B(&preloop, gt); - // Mask out compression flag and adjust length for compressed string (8-bit) - // as if it is a 16-bit data, new_length = (length + 1) / 2 - __ Add(temp, temp, 1); - __ Lsr(temp, temp, 1); + // For string compression, calculate the number of bytes to compare (not chars). + // This could in theory exceed INT32_MAX, so treat temp as unsigned. + __ Lsr(temp, temp, 1u); // Extract length. + __ And(temp1, temp1, Operand(1)); // Extract compression flag. + __ Lsl(temp, temp, temp1); // Calculate number of bytes to compare. } - temp1 = temp1.X(); - temp2 = temp2.X(); - // Loop to compare strings 4 characters at a time starting at the beginning of the string. - // Ok to do this because strings are zero-padded to be 8-byte aligned. // Store offset of string value in preparation for comparison loop - __ Bind(&preloop); __ Mov(temp1, value_offset); + + temp1 = temp1.X(); + temp2 = temp2.X(); + // Loop to compare strings 8 bytes at a time starting at the front of the string. + // Ok to do this because strings are zero-padded to kObjectAlignment. __ Bind(&loop); __ Ldr(out, MemOperand(str.X(), temp1)); __ Ldr(temp2, MemOperand(arg.X(), temp1)); __ Add(temp1, temp1, Operand(sizeof(uint64_t))); __ Cmp(out, temp2); __ B(&return_false, ne); - __ Sub(temp, temp, Operand(4), SetFlags); - __ B(&loop, gt); + // With string compression, we have compared 8 bytes, otherwise 4 chars. + __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags); + __ B(&loop, hi); // Return true and exit the function. // If loop does not result in returning false, we return true. @@ -1900,10 +1899,6 @@ void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - // Need temporary register for String compression feature. - if (mirror::kUseStringCompression) { - locations->AddTemp(Location::RequiresRegister()); - } } void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -1931,10 +1926,6 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register src_ptr = XRegisterFrom(locations->GetTemp(0)); Register num_chr = XRegisterFrom(locations->GetTemp(1)); Register tmp1 = XRegisterFrom(locations->GetTemp(2)); - Register tmp3; - if (mirror::kUseStringCompression) { - tmp3 = WRegisterFrom(locations->GetTemp(3)); - } UseScratchRegisterScope temps(masm); Register dst_ptr = temps.AcquireX(); @@ -1957,8 +1948,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { // Location of count in string. const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); // String's length. - __ Ldr(tmp3, MemOperand(srcObj, count_offset)); - __ Tbnz(tmp3, kWRegSize - 1, &compressed_string_preloop); + __ Ldr(tmp2, MemOperand(srcObj, count_offset)); + __ Tbz(tmp2, 0, &compressed_string_preloop); } __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1)); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index e5240a2871..e4bef3446c 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -1120,7 +1120,6 @@ void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) { // Need temporary registers for String compression's feature. if (mirror::kUseStringCompression) { locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); } locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } @@ -1136,10 +1135,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0)); vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); - vixl32::Register temp3, temp4; + vixl32::Register temp3; if (mirror::kUseStringCompression) { temp3 = RegisterFrom(locations->GetTemp(3)); - temp4 = RegisterFrom(locations->GetTemp(4)); } vixl32::Label loop; @@ -1167,23 +1165,20 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { __ Subs(out, str, arg); __ B(eq, &end); - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - vixl32::Register temp_reg = temps.Acquire(); - if (mirror::kUseStringCompression) { - // Load lengths of this and argument strings. + // Load `count` fields of this and argument strings. __ Ldr(temp3, MemOperand(str, count_offset)); - __ Ldr(temp4, MemOperand(arg, count_offset)); - // Clean out compression flag from lengths. - __ Bic(temp0, temp3, 0x80000000); - __ Bic(temp_reg, temp4, 0x80000000); + __ Ldr(temp2, MemOperand(arg, count_offset)); + // Extract lengths from the `count` fields. + __ Lsr(temp0, temp3, 1u); + __ Lsr(temp1, temp2, 1u); } else { // Load lengths of this and argument strings. __ Ldr(temp0, MemOperand(str, count_offset)); - __ Ldr(temp_reg, MemOperand(arg, count_offset)); + __ Ldr(temp1, MemOperand(arg, count_offset)); } // out = length diff. - __ Subs(out, temp0, temp_reg); + __ Subs(out, temp0, temp1); // temp0 = min(len(str), len(arg)). { @@ -1192,33 +1187,32 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { CodeBufferCheckScope::kMaximumSize); __ it(gt); - __ mov(gt, temp0, temp_reg); + __ mov(gt, temp0, temp1); } - temps.Release(temp_reg); // Shorter string is empty? __ Cbz(temp0, &end); if (mirror::kUseStringCompression) { // Check if both strings using same compression style to use this comparison loop. - __ Eors(temp3, temp3, temp4); - __ B(mi, &different_compression); - } - // Store offset of string value in preparation for comparison loop. - __ Mov(temp1, value_offset); - if (mirror::kUseStringCompression) { + __ Eors(temp2, temp2, temp3); + __ Lsrs(temp2, temp2, 1u); + __ B(cs, &different_compression); // For string compression, calculate the number of bytes to compare (not chars). // This could in theory exceed INT32_MAX, so treat temp0 as unsigned. - __ Cmp(temp4, 0); + __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag. AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), 2 * kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); - __ it(ge); - __ add(ge, temp0, temp0, temp0); + __ it(ne); + __ add(ne, temp0, temp0, temp0); } + // Store offset of string value in preparation for comparison loop. + __ Mov(temp1, value_offset); + // Assertions that must hold in order to compare multiple characters at a time. CHECK_ALIGNED(value_offset, 8); static_assert(IsAligned<8>(kObjectAlignment), @@ -1227,10 +1221,12 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); DCHECK_EQ(char_size, 2u); + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + vixl32::Label find_char_diff_2nd_cmp; // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment). __ Bind(&loop); - temp_reg = temps.Acquire(); + vixl32::Register temp_reg = temps.Acquire(); __ Ldr(temp_reg, MemOperand(str, temp1)); __ Ldr(temp2, MemOperand(arg, temp1)); __ Cmp(temp_reg, temp2); @@ -1279,72 +1275,92 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { // The comparison is unsigned for string compression, otherwise signed. __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4))); __ B((mirror::kUseStringCompression ? ls : le), &end); + // Extract the characters and calculate the difference. - vixl32::Label uncompressed_string, continue_process; if (mirror::kUseStringCompression) { - __ Cmp(temp4, 0); - __ B(ge, &uncompressed_string); - __ Bic(temp1, temp1, 0x7); - __ B(&continue_process); + // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear + // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`. + // The compression flag is now in the highest bit of temp3, so let's play some tricks. + __ orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u + __ bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u) + __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u. + __ Lsr(temp2, temp2, temp1); // Extract second character. + __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu + __ Lsr(out, temp_reg, temp1); // Extract first character. + __ and_(temp2, temp2, temp3); + __ and_(out, out, temp3); + } else { + __ bic(temp1, temp1, 0xf); + __ Lsr(temp2, temp2, temp1); + __ Lsr(out, temp_reg, temp1); + __ movt(temp2, 0); + __ movt(out, 0); } - __ Bind(&uncompressed_string); - __ Bic(temp1, temp1, 0xf); - __ Bind(&continue_process); - __ Lsr(temp2, temp2, temp1); - __ Lsr(temp_reg, temp_reg, temp1); - vixl32::Label calculate_difference, uncompressed_string_extract_chars; - if (mirror::kUseStringCompression) { - __ Cmp(temp4, 0); - __ B(ge, &uncompressed_string_extract_chars); - __ Ubfx(temp2, temp2, 0, 8); - __ Ubfx(temp_reg, temp_reg, 0, 8); - __ B(&calculate_difference); - } - __ Bind(&uncompressed_string_extract_chars); - __ Movt(temp2, 0); - __ Movt(temp_reg, 0); - __ Bind(&calculate_difference); - __ Sub(out, temp_reg, temp2); + __ Sub(out, out, temp2); temps.Release(temp_reg); - __ B(&end); if (mirror::kUseStringCompression) { + __ B(&end); + __ Bind(&different_compression); + + // Comparison for different compression style. const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); DCHECK_EQ(c_char_size, 1u); - vixl32::Label loop_arg_compressed, loop_this_compressed, find_diff; - // Comparison for different compression style. - // This part is when THIS is compressed and ARG is not. - __ Bind(&different_compression); - __ Add(temp2, str, value_offset); - __ Add(temp3, arg, value_offset); - __ Cmp(temp4, 0); - __ B(lt, &loop_arg_compressed); - __ Bind(&loop_this_compressed); - temp_reg = temps.Acquire(); - __ Ldrb(temp_reg, MemOperand(temp2, c_char_size, PostIndex)); - __ Ldrh(temp4, MemOperand(temp3, char_size, PostIndex)); - __ Cmp(temp_reg, temp4); - __ B(ne, &find_diff); - __ Subs(temp0, temp0, 1); - __ B(gt, &loop_this_compressed); - __ B(&end); + // We want to free up the temp3, currently holding `str.count`, for comparison. + // So, we move it to the bottom bit of the iteration count `temp0` which we tnen + // need to treat as unsigned. Start by freeing the bit with an ADD and continue + // further down by a LSRS+SBC which will flip the meaning of the flag but allow + // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition. + __ add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit. + // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer. + __ mov(temp1, str); + __ mov(temp2, arg); + __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag. + { + AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ itt(cs); // Interleave with selection of temp1 and temp2. + __ mov(cs, temp1, arg); // Preserves flags. + __ mov(cs, temp2, str); // Preserves flags. + } + __ sbc(temp0, temp0, 0); // Complete the move of the compression flag. + + // Adjust temp1 and temp2 from string pointers to data pointers. + __ add(temp1, temp1, value_offset); + __ add(temp2, temp2, value_offset); - // This part is when THIS is not compressed and ARG is. - __ Bind(&loop_arg_compressed); - __ Ldrh(temp_reg, MemOperand(temp2, char_size, PostIndex)); - __ Ldrb(temp4, MemOperand(temp3, c_char_size, PostIndex)); - __ Cmp(temp_reg, temp4); - __ B(ne, &find_diff); - __ Subs(temp0, temp0, 1); - __ B(gt, &loop_arg_compressed); + vixl32::Label different_compression_loop; + vixl32::Label different_compression_diff; + + // Main loop for different compression. + temp_reg = temps.Acquire(); + __ Bind(&different_compression_loop); + __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex)); + __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex)); + __ cmp(temp_reg, temp3); + __ B(ne, &different_compression_diff); + __ Subs(temp0, temp0, 2); + __ B(hi, &different_compression_loop); __ B(&end); // Calculate the difference. - __ Bind(&find_diff); - __ Sub(out, temp_reg, temp4); + __ Bind(&different_compression_diff); + __ Sub(out, temp_reg, temp3); temps.Release(temp_reg); + // Flip the difference if the `arg` is compressed. + // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag. + __ Lsrs(temp0, temp0, 1u); + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + + AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cc); + __ rsb(cc, out, out, 0); } __ Bind(&end); @@ -1382,7 +1398,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); - vixl32::Label loop, preloop; + vixl32::Label loop; vixl32::Label end; vixl32::Label return_true; vixl32::Label return_false; @@ -1401,6 +1417,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { __ Cbz(arg, &return_false); } + // Reference equality check, return true if same reference. + __ Cmp(str, arg); + __ B(eq, &return_true); + if (!optimizations.GetArgumentIsString()) { // Instanceof check for the argument by comparing class fields. // All string objects must have the same type since String cannot be subclassed. @@ -1412,48 +1432,47 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { __ B(ne, &return_false); } - // Load lengths of this and argument strings. + // Load `count` fields of this and argument strings. __ Ldr(temp, MemOperand(str, count_offset)); __ Ldr(temp1, MemOperand(arg, count_offset)); - // Check if lengths are equal, return false if they're not. + // Check if `count` fields are equal, return false if they're not. // Also compares the compression style, if differs return false. __ Cmp(temp, temp1); __ B(ne, &return_false); - // Return true if both strings are empty. - if (mirror::kUseStringCompression) { - // Length needs to be masked out first because 0 is treated as compressed. - __ Bic(temp, temp, 0x80000000); - } + // Return true if both strings are empty. Even with string compression `count == 0` means empty. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); __ Cbz(temp, &return_true); - // Reference equality check, return true if same reference. - __ Cmp(str, arg); - __ B(eq, &return_true); - // Assertions that must hold in order to compare strings 2 characters at a time. + // Assertions that must hold in order to compare strings 4 bytes at a time. DCHECK_ALIGNED(value_offset, 4); static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare."); if (mirror::kUseStringCompression) { - // If not compressed, directly to fast compare. Else do preprocess on length. - __ Cmp(temp1, 0); - __ B(gt, &preloop); - // Mask out compression flag and adjust length for compressed string (8-bit) - // as if it is a 16-bit data, new_length = (length + 1) / 2. - __ Add(temp, temp, 1); - __ Lsr(temp, temp, 1); - __ Bind(&preloop); + // For string compression, calculate the number of bytes to compare (not chars). + // This could in theory exceed INT32_MAX, so treat temp as unsigned. + __ Lsrs(temp, temp, 1u); // Extract length and check compression flag. + AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cs); // If uncompressed, + __ add(cs, temp, temp, temp); // double the byte count. } - // Loop to compare strings 2 characters at a time starting at the front of the string. - // Ok to do this because strings with an odd length are zero-padded. + + // Store offset of string value in preparation for comparison loop. __ Mov(temp1, value_offset); + + // Loop to compare strings 4 bytes at a time starting at the front of the string. + // Ok to do this because strings are zero-padded to kObjectAlignment. __ Bind(&loop); __ Ldr(out, MemOperand(str, temp1)); __ Ldr(temp2, MemOperand(arg, temp1)); + __ Add(temp1, temp1, sizeof(uint32_t)); __ Cmp(out, temp2); __ B(ne, &return_false); - __ Add(temp1, temp1, sizeof(uint32_t)); - __ Subs(temp, temp, sizeof(uint32_t) / sizeof(uint16_t)); - __ B(gt, &loop); + // With string compression, we have compared 4 bytes, otherwise 2 chars. + __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2); + __ B(hi, &loop); // Return true and exit the function. // If loop does not result in returning false, we return true. @@ -2547,9 +2566,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) temp = temps.Acquire(); // String's length. __ Ldr(temp, MemOperand(srcObj, count_offset)); - __ Cmp(temp, 0); + __ Tst(temp, 1); temps.Release(temp); - __ B(lt, &compressed_string_preloop); + __ B(eq, &compressed_string_preloop); } __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1)); @@ -2588,9 +2607,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); temps.Release(temp); __ B(gt, &remainder); - __ B(&done); if (mirror::kUseStringCompression) { + __ B(&done); + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); DCHECK_EQ(c_char_size, 1u); // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index bac98d52ac..06ab46f536 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -1408,21 +1408,22 @@ void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) { // compression style is decided on alloc. __ cmpl(ecx, Address(arg, count_offset)); __ j(kNotEqual, &return_false); + // Return true if strings are empty. Even with string compression `count == 0` means empty. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ jecxz(&return_true); if (mirror::kUseStringCompression) { NearLabel string_uncompressed; - // Differ cases into both compressed or both uncompressed. Different compression style - // is cut above. - __ cmpl(ecx, Immediate(0)); - __ j(kGreaterEqual, &string_uncompressed); + // Extract length and differentiate between both compressed or both uncompressed. + // Different compression style is cut above. + __ shrl(ecx, Immediate(1)); + __ j(kCarrySet, &string_uncompressed); // Divide string length by 2, rounding up, and continue as if uncompressed. - // Merge clearing the compression flag (+0x80000000) with +1 for rounding. - __ addl(ecx, Immediate(0x80000001)); + __ addl(ecx, Immediate(1)); __ shrl(ecx, Immediate(1)); __ Bind(&string_uncompressed); } - // Return true if strings are empty. - __ jecxz(&return_true); // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction. __ leal(esi, Address(str, value_offset)); __ leal(edi, Address(arg, value_offset)); @@ -1535,21 +1536,24 @@ static void GenerateStringIndexOf(HInvoke* invoke, // Location of count within the String object. int32_t count_offset = mirror::String::CountOffset().Int32Value(); - // Load string length, i.e., the count field of the string. + // Load the count field of the string containing the length and compression flag. __ movl(string_length, Address(string_obj, count_offset)); - if (mirror::kUseStringCompression) { - string_length_flagged = locations->GetTemp(2).AsRegister<Register>(); - __ movl(string_length_flagged, string_length); - // Mask out first bit used as compression flag. - __ andl(string_length, Immediate(INT32_MAX)); - } - // Do a zero-length check. + // Do a zero-length check. Even with string compression `count == 0` means empty. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); // TODO: Support jecxz. NearLabel not_found_label; __ testl(string_length, string_length); __ j(kEqual, ¬_found_label); + if (mirror::kUseStringCompression) { + string_length_flagged = locations->GetTemp(2).AsRegister<Register>(); + __ movl(string_length_flagged, string_length); + // Extract the length and shift out the least significant bit used as compression flag. + __ shrl(string_length, Immediate(1)); + } + if (start_at_zero) { // Number of chars to scan is the same as the string length. __ movl(counter, string_length); @@ -1570,8 +1574,8 @@ static void GenerateStringIndexOf(HInvoke* invoke, if (mirror::kUseStringCompression) { NearLabel modify_counter, offset_uncompressed_label; - __ cmpl(string_length_flagged, Immediate(0)); - __ j(kGreaterEqual, &offset_uncompressed_label); + __ testl(string_length_flagged, Immediate(1)); + __ j(kNotZero, &offset_uncompressed_label); // Move to the start of the string: string_obj + value_offset + start_index. __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset)); __ jmp(&modify_counter); @@ -1593,8 +1597,8 @@ static void GenerateStringIndexOf(HInvoke* invoke, if (mirror::kUseStringCompression) { NearLabel uncompressed_string_comparison; NearLabel comparison_done; - __ cmpl(string_length_flagged, Immediate(0)); - __ j(kGreater, &uncompressed_string_comparison); + __ testl(string_length_flagged, Immediate(1)); + __ j(kNotZero, &uncompressed_string_comparison); // Check if EAX (search_value) is ASCII. __ cmpl(search_value, Immediate(127)); @@ -1787,8 +1791,10 @@ void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ cfi().AdjustCFAOffset(stack_adjust); NearLabel copy_loop, copy_uncompressed; - __ cmpl(Address(obj, count_offset), Immediate(0)); - __ j(kGreaterEqual, ©_uncompressed); + __ testl(Address(obj, count_offset), Immediate(1)); + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ j(kNotZero, ©_uncompressed); // Compute the address of the source string by adding the number of chars from // the source beginning to the value offset of a string. __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset)); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 01577f751c..2ea8670100 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -1574,20 +1574,23 @@ void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) { // compression style is decided on alloc. __ cmpl(rcx, Address(arg, count_offset)); __ j(kNotEqual, &return_false); + // Return true if both strings are empty. Even with string compression `count == 0` means empty. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ jrcxz(&return_true); if (mirror::kUseStringCompression) { NearLabel string_uncompressed; - // Both string are compressed. - __ cmpl(rcx, Immediate(0)); - __ j(kGreaterEqual, &string_uncompressed); + // Extract length and differentiate between both compressed or both uncompressed. + // Different compression style is cut above. + __ shrl(rcx, Immediate(1)); + __ j(kCarrySet, &string_uncompressed); // Divide string length by 2, rounding up, and continue as if uncompressed. // Merge clearing the compression flag with +1 for rounding. - __ addl(rcx, Immediate(static_cast<int32_t>(0x80000001))); + __ addl(rcx, Immediate(1)); __ shrl(rcx, Immediate(1)); __ Bind(&string_uncompressed); } - // Return true if both strings are empty. - __ jrcxz(&return_true); // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction. __ leal(rsi, Address(str, value_offset)); __ leal(rdi, Address(arg, value_offset)); @@ -1694,21 +1697,22 @@ static void GenerateStringIndexOf(HInvoke* invoke, // Location of count within the String object. int32_t count_offset = mirror::String::CountOffset().Int32Value(); - // Load string length, i.e., the count field of the string. + // Load the count field of the string containing the length and compression flag. __ movl(string_length, Address(string_obj, count_offset)); - if (mirror::kUseStringCompression) { - // Use TMP to keep string_length_flagged. - __ movl(CpuRegister(TMP), string_length); - // Mask out first bit used as compression flag. - __ andl(string_length, Immediate(INT32_MAX)); - } - // Do a length check. + // Do a zero-length check. Even with string compression `count == 0` means empty. // TODO: Support jecxz. NearLabel not_found_label; __ testl(string_length, string_length); __ j(kEqual, ¬_found_label); + if (mirror::kUseStringCompression) { + // Use TMP to keep string_length_flagged. + __ movl(CpuRegister(TMP), string_length); + // Mask out first bit used as compression flag. + __ shrl(string_length, Immediate(1)); + } + if (start_at_zero) { // Number of chars to scan is the same as the string length. __ movl(counter, string_length); @@ -1728,8 +1732,8 @@ static void GenerateStringIndexOf(HInvoke* invoke, if (mirror::kUseStringCompression) { NearLabel modify_counter, offset_uncompressed_label; - __ cmpl(CpuRegister(TMP), Immediate(0)); - __ j(kGreaterEqual, &offset_uncompressed_label); + __ testl(CpuRegister(TMP), Immediate(1)); + __ j(kNotZero, &offset_uncompressed_label); __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset)); __ jmp(&modify_counter); // Move to the start of the string: string_obj + value_offset + 2 * start_index. @@ -1747,8 +1751,8 @@ static void GenerateStringIndexOf(HInvoke* invoke, if (mirror::kUseStringCompression) { NearLabel uncompressed_string_comparison; NearLabel comparison_done; - __ cmpl(CpuRegister(TMP), Immediate(0)); - __ j(kGreater, &uncompressed_string_comparison); + __ testl(CpuRegister(TMP), Immediate(1)); + __ j(kNotZero, &uncompressed_string_comparison); // Check if RAX (search_value) is ASCII. __ cmpl(search_value, Immediate(127)); __ j(kGreater, ¬_found_label); @@ -1931,8 +1935,10 @@ void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { // Location of count in string. const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); - __ cmpl(Address(obj, count_offset), Immediate(0)); - __ j(kGreaterEqual, ©_uncompressed); + __ testl(Address(obj, count_offset), Immediate(1)); + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ j(kNotZero, ©_uncompressed); // Compute the address of the source string by adding the number of chars from // the source beginning to the value offset of a string. __ leaq(CpuRegister(RSI), |