diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 58 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 55 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_arm.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_arm64.cc | 7 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm.cc | 210 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 182 |
6 files changed, 448 insertions, 72 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index e343657f29..9870876879 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -4633,7 +4633,9 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { } // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + // Also need for String compression feature. + if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) + || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { locations->AddTemp(Location::RequiresRegister()); } } @@ -4646,6 +4648,8 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Location out_loc = locations->Out(); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); Primitive::Type type = instruction->GetType(); + const bool maybe_compressed_char_at = mirror::kUseStringCompression && + instruction->IsStringCharAt(); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. @@ -4659,10 +4663,31 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: { if (index.IsConstant()) { int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); - uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); + if (maybe_compressed_char_at) { + Register length = IP; + Label uncompressed_load, done; + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + __ LoadFromOffset(kLoadWord, length, obj, count_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ cmp(length, ShifterOperand(0)); + __ b(&uncompressed_load, GE); + __ LoadFromOffset(kLoadUnsignedByte, + out_loc.AsRegister<Register>(), + obj, + data_offset + const_index); + __ b(&done); + __ Bind(&uncompressed_load); + __ LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar), + out_loc.AsRegister<Register>(), + obj, + data_offset + (const_index << 1)); + __ Bind(&done); + } else { + uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); - LoadOperandType load_type = GetLoadOperandType(type); - __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset); + LoadOperandType load_type = GetLoadOperandType(type); + __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset); + } } else { Register temp = IP; @@ -4678,7 +4703,24 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } else { __ add(temp, obj, ShifterOperand(data_offset)); } - codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>()); + if (maybe_compressed_char_at) { + Label uncompressed_load, done; + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + Register length = locations->GetTemp(0).AsRegister<Register>(); + __ LoadFromOffset(kLoadWord, length, obj, count_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ cmp(length, ShifterOperand(0)); + __ b(&uncompressed_load, GE); + __ ldrb(out_loc.AsRegister<Register>(), + Address(temp, index.AsRegister<Register>(), Shift::LSL, 0)); + __ b(&done); + __ Bind(&uncompressed_load); + __ ldrh(out_loc.AsRegister<Register>(), + Address(temp, index.AsRegister<Register>(), Shift::LSL, 1)); + __ Bind(&done); + } else { + codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>()); + } } break; } @@ -4778,7 +4820,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { if (type == Primitive::kPrimNot) { // Potential implicit null checks, in the case of reference // arrays, are handled in the previous switch statement. - } else { + } else if (!maybe_compressed_char_at) { codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -5068,6 +5110,10 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadWord, out, obj, offset); codegen_->MaybeRecordImplicitNullCheck(instruction); + // Mask out compression flag from String's array length. + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + __ bic(out, out, ShifterOperand(1u << 31)); + } } void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) { diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 5d002674d8..969d653f97 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -2101,7 +2101,8 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { Location index = locations->InAt(1); Location out = locations->Out(); uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); - + const bool maybe_compressed_char_at = mirror::kUseStringCompression && + instruction->IsStringCharAt(); MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); // Block pools between `Load` and `MaybeRecordImplicitNullCheck`. @@ -2119,9 +2120,28 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } else { // General case. MemOperand source = HeapOperand(obj); + Register length; + if (maybe_compressed_char_at) { + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + length = temps.AcquireW(); + __ Ldr(length, HeapOperand(obj, count_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); - source = HeapOperand(obj, offset); + if (maybe_compressed_char_at) { + vixl::aarch64::Label uncompressed_load, done; + __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load); + __ Ldrb(Register(OutputCPURegister(instruction)), + HeapOperand(obj, offset + Int64ConstantFrom(index))); + __ B(&done); + __ Bind(&uncompressed_load); + __ Ldrh(Register(OutputCPURegister(instruction)), + HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1))); + __ Bind(&done); + } else { + offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); + source = HeapOperand(obj, offset); + } } else { Register temp = temps.AcquireSameSizeAs(obj); if (instruction->GetArray()->IsIntermediateAddress()) { @@ -2139,11 +2159,24 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } else { __ Add(temp, obj, offset); } - source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); + if (maybe_compressed_char_at) { + vixl::aarch64::Label uncompressed_load, done; + __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load); + __ Ldrb(Register(OutputCPURegister(instruction)), + HeapOperand(temp, XRegisterFrom(index), LSL, 0)); + __ B(&done); + __ Bind(&uncompressed_load); + __ Ldrh(Register(OutputCPURegister(instruction)), + HeapOperand(temp, XRegisterFrom(index), LSL, 1)); + __ Bind(&done); + } else { + source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); + } + } + if (!maybe_compressed_char_at) { + codegen_->Load(type, OutputCPURegister(instruction), source); + codegen_->MaybeRecordImplicitNullCheck(instruction); } - - codegen_->Load(type, OutputCPURegister(instruction), source); - codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { static_assert( @@ -2167,9 +2200,14 @@ void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) { uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); + vixl::aarch64::Register out = OutputRegister(instruction); BlockPoolsScope block_pools(GetVIXLAssembler()); - __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), offset)); + __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); + // Mask out compression flag from String's array length. + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + __ And(out.W(), out.W(), Operand(static_cast<int32_t>(INT32_MAX))); + } } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { @@ -2361,7 +2399,6 @@ void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); - __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1)); __ B(slow_path->GetEntryLabel(), hs); } diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc index 495f3fd232..56e4c7a9c2 100644 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -44,6 +44,14 @@ void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) { size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); Primitive::Type type = instruction->GetType(); + // TODO: Implement reading (length + compression) for String compression feature from + // negative offset (count_offset - data_offset). Thumb2Assembler does not support T4 + // encoding of "LDR (immediate)" at the moment. + // Don't move array pointer if it is charAt because we need to take the count first. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + return; + } + if (type == Primitive::kPrimLong || type == Primitive::kPrimFloat || type == Primitive::kPrimDouble) { diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 6d107d571f..d0dd650024 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -140,6 +140,13 @@ void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) { void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + // Don't move the array pointer if it is charAt because we need to take the count first. + // TODO: Implement reading (length + compression) for String compression feature from + // negative offset (count_offset - data_offset) using LDP and clobbering an extra temporary. + // Note that "LDR (Immediate)" does not have a "signed offset" encoding. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + return; + } if (TryExtractArrayAccessAddress(instruction, instruction->GetArray(), instruction->GetIndex(), diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index fd2da1004b..96a6ecbee9 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1039,6 +1039,11 @@ void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + // Need temporary registers for String compression's feature. + if (mirror::kUseStringCompression) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } @@ -1053,10 +1058,16 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { Register temp0 = locations->GetTemp(0).AsRegister<Register>(); Register temp1 = locations->GetTemp(1).AsRegister<Register>(); Register temp2 = locations->GetTemp(2).AsRegister<Register>(); + Register temp3, temp4; + if (mirror::kUseStringCompression) { + temp3 = locations->GetTemp(3).AsRegister<Register>(); + temp4 = locations->GetTemp(4).AsRegister<Register>(); + } Label loop; Label find_char_diff; Label end; + Label different_compression; // Get offsets of count and value fields within a string object. const int32_t count_offset = mirror::String::CountOffset().Int32Value(); @@ -1077,20 +1088,40 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { // Reference equality check, return 0 if same reference. __ subs(out, str, ShifterOperand(arg)); __ b(&end, EQ); - // Load lengths of this and argument strings. - __ ldr(temp2, Address(str, count_offset)); - __ ldr(temp1, Address(arg, count_offset)); + if (mirror::kUseStringCompression) { + // Load lengths of this and argument strings. + __ ldr(temp3, Address(str, count_offset)); + __ ldr(temp4, Address(arg, count_offset)); + // Clean out compression flag from lengths. + __ bic(temp0, temp3, ShifterOperand(0x80000000)); + __ bic(IP, temp4, ShifterOperand(0x80000000)); + } else { + // Load lengths of this and argument strings. + __ ldr(temp0, Address(str, count_offset)); + __ ldr(IP, Address(arg, count_offset)); + } // out = length diff. - __ subs(out, temp2, ShifterOperand(temp1)); + __ subs(out, temp0, ShifterOperand(IP)); // temp0 = min(len(str), len(arg)). - __ it(Condition::LT, kItElse); - __ mov(temp0, ShifterOperand(temp2), Condition::LT); - __ mov(temp0, ShifterOperand(temp1), Condition::GE); + __ it(GT); + __ mov(temp0, ShifterOperand(IP), GT); // Shorter string is empty? __ CompareAndBranchIfZero(temp0, &end); + if (mirror::kUseStringCompression) { + // Check if both strings using same compression style to use this comparison loop. + __ eors(temp3, temp3, ShifterOperand(temp4)); + __ b(&different_compression, MI); + } // Store offset of string value in preparation for comparison loop. __ mov(temp1, ShifterOperand(value_offset)); + if (mirror::kUseStringCompression) { + // For string compression, calculate the number of bytes to compare (not chars). + // This could in theory exceed INT32_MAX, so treat temp0 as unsigned. + __ cmp(temp4, ShifterOperand(0)); + __ it(GE); + __ add(temp0, temp0, ShifterOperand(temp0), GE); + } // Assertions that must hold in order to compare multiple characters at a time. CHECK_ALIGNED(value_offset, 8); @@ -1100,6 +1131,7 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); DCHECK_EQ(char_size, 2u); + Label find_char_diff_2nd_cmp; // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment). __ Bind(&loop); __ ldr(IP, Address(str, temp1)); @@ -1107,43 +1139,113 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { __ cmp(IP, ShifterOperand(temp2)); __ b(&find_char_diff, NE); __ add(temp1, temp1, ShifterOperand(char_size * 2)); - __ sub(temp0, temp0, ShifterOperand(2)); __ ldr(IP, Address(str, temp1)); __ ldr(temp2, Address(arg, temp1)); __ cmp(IP, ShifterOperand(temp2)); - __ b(&find_char_diff, NE); + __ b(&find_char_diff_2nd_cmp, NE); __ add(temp1, temp1, ShifterOperand(char_size * 2)); - __ subs(temp0, temp0, ShifterOperand(2)); - - __ b(&loop, GT); + // With string compression, we have compared 8 bytes, otherwise 4 chars. + __ subs(temp0, temp0, ShifterOperand(mirror::kUseStringCompression ? 8 : 4)); + __ b(&loop, HI); __ b(&end); - // Find the single 16-bit character difference. + __ Bind(&find_char_diff_2nd_cmp); + if (mirror::kUseStringCompression) { + __ subs(temp0, temp0, ShifterOperand(4)); // 4 bytes previously compared. + __ b(&end, LS); // Was the second comparison fully beyond the end? + } else { + // Without string compression, we can start treating temp0 as signed + // and rely on the signed comparison below. + __ sub(temp0, temp0, ShifterOperand(2)); + } + + // Find the single character difference. __ Bind(&find_char_diff); // Get the bit position of the first character that differs. __ eor(temp1, temp2, ShifterOperand(IP)); __ rbit(temp1, temp1); __ clz(temp1, temp1); - // temp0 = number of 16-bit characters remaining to compare. - // (it could be < 1 if a difference is found after the first SUB in the comparison loop, and - // after the end of the shorter string data). - - // (temp1 >> 4) = character where difference occurs between the last two words compared, on the - // interval [0,1] (0 for low half-word different, 1 for high half-word different). - - // If temp0 <= (temp1 >> 4), the difference occurs outside the remaining string data, so just - // return length diff (out). - __ cmp(temp0, ShifterOperand(temp1, LSR, 4)); - __ b(&end, LE); + // temp0 = number of characters remaining to compare. + // (Without string compression, it could be < 1 if a difference is found by the second CMP + // in the comparison loop, and after the end of the shorter string data). + + // Without string compression (temp1 >> 4) = character where difference occurs between the last + // two words compared, in the interval [0,1]. + // (0 for low half-word different, 1 for high half-word different). + // With string compression, (temp1 << 3) = byte where the difference occurs, + // in the interval [0,3]. + + // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside + // the remaining string data, so just return length diff (out). + // The comparison is unsigned for string compression, otherwise signed. + __ cmp(temp0, ShifterOperand(temp1, LSR, mirror::kUseStringCompression ? 3 : 4)); + __ b(&end, mirror::kUseStringCompression ? LS : LE); // Extract the characters and calculate the difference. + Label uncompressed_string, continue_process; + if (mirror::kUseStringCompression) { + __ cmp(temp4, ShifterOperand(0)); + __ b(&uncompressed_string, GE); + __ bic(temp1, temp1, ShifterOperand(0x7)); + __ b(&continue_process); + } + __ Bind(&uncompressed_string); __ bic(temp1, temp1, ShifterOperand(0xf)); + __ Bind(&continue_process); + __ Lsr(temp2, temp2, temp1); __ Lsr(IP, IP, temp1); + Label calculate_difference, uncompressed_string_extract_chars; + if (mirror::kUseStringCompression) { + __ cmp(temp4, ShifterOperand(0)); + __ b(&uncompressed_string_extract_chars, GE); + __ ubfx(temp2, temp2, 0, 8); + __ ubfx(IP, IP, 0, 8); + __ b(&calculate_difference); + } + __ Bind(&uncompressed_string_extract_chars); __ movt(temp2, 0); __ movt(IP, 0); + __ Bind(&calculate_difference); __ sub(out, IP, ShifterOperand(temp2)); + __ b(&end); + + if (mirror::kUseStringCompression) { + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + Label loop_arg_compressed, loop_this_compressed, find_diff; + // Comparison for different compression style. + // This part is when THIS is compressed and ARG is not. + __ Bind(&different_compression); + __ add(temp2, str, ShifterOperand(value_offset)); + __ add(temp3, arg, ShifterOperand(value_offset)); + __ cmp(temp4, ShifterOperand(0)); + __ b(&loop_arg_compressed, LT); + + __ Bind(&loop_this_compressed); + __ ldrb(IP, Address(temp2, c_char_size, Address::PostIndex)); + __ ldrh(temp4, Address(temp3, char_size, Address::PostIndex)); + __ cmp(IP, ShifterOperand(temp4)); + __ b(&find_diff, NE); + __ subs(temp0, temp0, ShifterOperand(1)); + __ b(&loop_this_compressed, GT); + __ b(&end); + + // This part is when THIS is not compressed and ARG is. + __ Bind(&loop_arg_compressed); + __ ldrh(IP, Address(temp2, char_size, Address::PostIndex)); + __ ldrb(temp4, Address(temp3, c_char_size, Address::PostIndex)); + __ cmp(IP, ShifterOperand(temp4)); + __ b(&find_diff, NE); + __ subs(temp0, temp0, ShifterOperand(1)); + __ b(&loop_arg_compressed, GT); + __ b(&end); + + // Calculate the difference. + __ Bind(&find_diff); + __ sub(out, IP, ShifterOperand(temp4)); + } __ Bind(&end); @@ -1180,7 +1282,7 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { Register temp1 = locations->GetTemp(1).AsRegister<Register>(); Register temp2 = locations->GetTemp(2).AsRegister<Register>(); - Label loop; + Label loop, preloop; Label end; Label return_true; Label return_false; @@ -1214,11 +1316,15 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { __ ldr(temp, Address(str, count_offset)); __ ldr(temp1, Address(arg, count_offset)); // Check if lengths are equal, return false if they're not. + // Also compares the compression style, if differs return false. __ cmp(temp, ShifterOperand(temp1)); __ b(&return_false, NE); // Return true if both strings are empty. + if (mirror::kUseStringCompression) { + // Length needs to be masked out first because 0 is treated as compressed. + __ bic(temp, temp, ShifterOperand(0x80000000)); + } __ cbz(temp, &return_true); - // Reference equality check, return true if same reference. __ cmp(str, ShifterOperand(arg)); __ b(&return_true, EQ); @@ -1227,10 +1333,19 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { DCHECK_ALIGNED(value_offset, 4); static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare."); - __ LoadImmediate(temp1, value_offset); - + if (mirror::kUseStringCompression) { + // If not compressed, directly to fast compare. Else do preprocess on length. + __ cmp(temp1, ShifterOperand(0)); + __ b(&preloop, GT); + // Mask out compression flag and adjust length for compressed string (8-bit) + // as if it is a 16-bit data, new_length = (length + 1) / 2. + __ add(temp, temp, ShifterOperand(1)); + __ Lsr(temp, temp, 1); + __ Bind(&preloop); + } // Loop to compare strings 2 characters at a time starting at the front of the string. // Ok to do this because strings with an odd length are zero-padded. + __ LoadImmediate(temp1, value_offset); __ Bind(&loop); __ ldr(out, Address(str, temp1)); __ ldr(temp2, Address(arg, temp1)); @@ -2330,22 +2445,31 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register src_ptr = locations->GetTemp(1).AsRegister<Register>(); Register dst_ptr = locations->GetTemp(2).AsRegister<Register>(); - // src range to copy. - __ add(src_ptr, srcObj, ShifterOperand(value_offset)); - __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1)); - + Label done, compressed_string_loop; // dst to be copied. __ add(dst_ptr, dstObj, ShifterOperand(data_offset)); __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1)); __ subs(num_chr, srcEnd, ShifterOperand(srcBegin)); - - // Do the copy. - Label loop, remainder, done; - // Early out for valid zero-length retrievals. __ b(&done, EQ); + // src range to copy. + __ add(src_ptr, srcObj, ShifterOperand(value_offset)); + Label compressed_string_preloop; + if (mirror::kUseStringCompression) { + // Location of count in string. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + // String's length. + __ ldr(IP, Address(srcObj, count_offset)); + __ cmp(IP, ShifterOperand(0)); + __ b(&compressed_string_preloop, LT); + } + __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1)); + + // Do the copy. + Label loop, remainder; + // Save repairing the value of num_chr on the < 4 character path. __ subs(IP, num_chr, ShifterOperand(4)); __ b(&remainder, LT); @@ -2374,6 +2498,20 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ subs(num_chr, num_chr, ShifterOperand(1)); __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex)); __ b(&remainder, GT); + __ b(&done); + + if (mirror::kUseStringCompression) { + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. + __ Bind(&compressed_string_preloop); + __ add(src_ptr, src_ptr, ShifterOperand(srcBegin)); + __ Bind(&compressed_string_loop); + __ ldrb(IP, Address(src_ptr, c_char_size, Address::PostIndex)); + __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex)); + __ subs(num_chr, num_chr, ShifterOperand(1)); + __ b(&compressed_string_loop, GT); + } __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index ce58657bcd..e2c1802fdc 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1223,6 +1223,11 @@ void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + // Need temporary registers for String compression's feature. + if (mirror::kUseStringCompression) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } @@ -1239,10 +1244,16 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { Register temp0 = WRegisterFrom(locations->GetTemp(0)); Register temp1 = WRegisterFrom(locations->GetTemp(1)); Register temp2 = WRegisterFrom(locations->GetTemp(2)); + Register temp3, temp5; + if (mirror::kUseStringCompression) { + temp3 = WRegisterFrom(locations->GetTemp(3)); + temp5 = WRegisterFrom(locations->GetTemp(4)); + } vixl::aarch64::Label loop; vixl::aarch64::Label find_char_diff; vixl::aarch64::Label end; + vixl::aarch64::Label different_compression; // Get offsets of count and value fields within a string object. const int32_t count_offset = mirror::String::CountOffset().Int32Value(); @@ -1263,9 +1274,18 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { // Reference equality check, return 0 if same reference. __ Subs(out, str, arg); __ B(&end, eq); - // Load lengths of this and argument strings. - __ Ldr(temp0, HeapOperand(str, count_offset)); - __ Ldr(temp1, HeapOperand(arg, count_offset)); + if (mirror::kUseStringCompression) { + // Load lengths of this and argument strings. + __ Ldr(temp3, HeapOperand(str, count_offset)); + __ Ldr(temp5, HeapOperand(arg, count_offset)); + // Clean out compression flag from lengths. + __ Bic(temp0, temp3, Operand(static_cast<int32_t>(0x80000000))); + __ Bic(temp1, temp5, Operand(static_cast<int32_t>(0x80000000))); + } else { + // Load lengths of this and argument strings. + __ Ldr(temp0, HeapOperand(str, count_offset)); + __ Ldr(temp1, HeapOperand(arg, count_offset)); + } // Return zero if both strings are empty. __ Orr(out, temp0, temp1); __ Cbz(out, &end); @@ -1276,8 +1296,22 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { // Shorter string is empty? __ Cbz(temp2, &end); + if (mirror::kUseStringCompression) { + // Check if both strings using same compression style to use this comparison loop. + __ Eor(temp3.W(), temp3, Operand(temp5)); + __ Tbnz(temp3.W(), kWRegSize - 1, &different_compression); + } // Store offset of string value in preparation for comparison loop. __ Mov(temp1, value_offset); + if (mirror::kUseStringCompression) { + // For string compression, calculate the number of bytes to compare (not chars). + // This could be in theory exceed INT32_MAX, so treat temp2 as unsigned. + vixl::aarch64::Label let_it_signed; + __ Cmp(temp5, Operand(0)); + __ B(lt, &let_it_signed); + __ Add(temp2, temp2, Operand(temp2)); + __ Bind(&let_it_signed); + } UseScratchRegisterScope scratch_scope(masm); Register temp4 = scratch_scope.AcquireX(); @@ -1299,29 +1333,90 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Cmp(temp4, temp0); __ B(ne, &find_char_diff); __ Add(temp1, temp1, char_size * 4); - __ Subs(temp2, temp2, 4); - __ B(gt, &loop); + // With string compression, we have compared 8 bytes, otherwise 4 chars. + __ Subs(temp2, temp2, (mirror::kUseStringCompression) ? 8 : 4); + __ B(hi, &loop); __ B(&end); // Promote temp1 to an X reg, ready for EOR. temp1 = temp1.X(); - // Find the single 16-bit character difference. + // Find the single character difference. __ Bind(&find_char_diff); // Get the bit position of the first character that differs. __ Eor(temp1, temp0, temp4); __ Rbit(temp1, temp1); __ Clz(temp1, temp1); - // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then + // If the number of chars remaining <= the index where the difference occurs (0-3), then // the difference occurs outside the remaining string data, so just return length diff (out). - __ Cmp(temp2, Operand(temp1.W(), LSR, 4)); - __ B(le, &end); + // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the + // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or + // unsigned when string compression is disabled. + // When it's enabled, the comparison must be unsigned. + __ Cmp(temp2, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4)); + __ B(ls, &end); // Extract the characters and calculate the difference. + vixl::aarch64::Label uncompressed_string, continue_process; + if (mirror:: kUseStringCompression) { + __ Tbz(temp5, kWRegSize - 1, &uncompressed_string); + __ Bic(temp1, temp1, 0x7); + __ B(&continue_process); + } + __ Bind(&uncompressed_string); __ Bic(temp1, temp1, 0xf); + __ Bind(&continue_process); + __ Lsr(temp0, temp0, temp1); __ Lsr(temp4, temp4, temp1); + vixl::aarch64::Label uncompressed_string_extract_chars; + if (mirror::kUseStringCompression) { + __ Tbz(temp5, kWRegSize - 1, &uncompressed_string_extract_chars); + __ And(temp4, temp4, 0xff); + __ Sub(out, temp4.W(), Operand(temp0.W(), UXTB)); + __ B(&end); + } + __ Bind(&uncompressed_string_extract_chars); __ And(temp4, temp4, 0xffff); __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH)); + __ B(&end); + + if (mirror::kUseStringCompression) { + vixl::aarch64::Label loop_this_compressed, loop_arg_compressed, find_diff; + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + temp0 = temp0.W(); + temp1 = temp1.W(); + // Comparison for different compression style. + // This part is when THIS is compressed and ARG is not. + __ Bind(&different_compression); + __ Add(temp0, str, Operand(value_offset)); + __ Add(temp1, arg, Operand(value_offset)); + __ Cmp(temp5, Operand(0)); + __ B(lt, &loop_arg_compressed); + + __ Bind(&loop_this_compressed); + __ Ldrb(temp3, MemOperand(temp0.X(), c_char_size, PostIndex)); + __ Ldrh(temp5, MemOperand(temp1.X(), char_size, PostIndex)); + __ Cmp(temp3, Operand(temp5)); + __ B(ne, &find_diff); + __ Subs(temp2, temp2, 1); + __ B(gt, &loop_this_compressed); + __ B(&end); + + // This part is when THIS is not compressed and ARG is. + __ Bind(&loop_arg_compressed); + __ Ldrh(temp3, MemOperand(temp0.X(), char_size, PostIndex)); + __ Ldrb(temp5, MemOperand(temp1.X(), c_char_size, PostIndex)); + __ Cmp(temp3, Operand(temp5)); + __ B(ne, &find_diff); + __ Subs(temp2, temp2, 1); + __ B(gt, &loop_arg_compressed); + __ B(&end); + + // Calculate the difference. + __ Bind(&find_diff); + __ Sub(out, temp3.W(), Operand(temp5.W(), UXTH)); + } __ Bind(&end); @@ -1356,7 +1451,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { Register temp1 = WRegisterFrom(locations->GetTemp(0)); Register temp2 = WRegisterFrom(locations->GetTemp(1)); - vixl::aarch64::Label loop; + vixl::aarch64::Label loop, preloop; vixl::aarch64::Label end; vixl::aarch64::Label return_true; vixl::aarch64::Label return_false; @@ -1394,22 +1489,37 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { __ Ldr(temp, MemOperand(str.X(), count_offset)); __ Ldr(temp1, MemOperand(arg.X(), count_offset)); // Check if lengths are equal, return false if they're not. + // Also compares the compression style, if differs return false. __ Cmp(temp, temp1); __ B(&return_false, ne); - // Store offset of string value in preparation for comparison loop - __ Mov(temp1, value_offset); // Return true if both strings are empty. + if (mirror::kUseStringCompression) { + // Length needs to be masked out first because 0 is treated as compressed. + __ Bic(temp, temp, Operand(static_cast<int32_t>(0x80000000))); + } __ Cbz(temp, &return_true); // Assertions that must hold in order to compare strings 4 characters at a time. DCHECK_ALIGNED(value_offset, 8); static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); + if (mirror::kUseStringCompression) { + // If not compressed, directly to fast compare. Else do preprocess on length. + __ Cmp(temp1, Operand(0)); + __ B(&preloop, gt); + // Mask out compression flag and adjust length for compressed string (8-bit) + // as if it is a 16-bit data, new_length = (length + 1) / 2 + __ Add(temp, temp, 1); + __ Lsr(temp, temp, 1); + } + temp1 = temp1.X(); temp2 = temp2.X(); - // Loop to compare strings 4 characters at a time starting at the beginning of the string. // Ok to do this because strings are zero-padded to be 8-byte aligned. + // Store offset of string value in preparation for comparison loop + __ Bind(&preloop); + __ Mov(temp1, value_offset); __ Bind(&loop); __ Ldr(out, MemOperand(str.X(), temp1)); __ Ldr(temp2, MemOperand(arg.X(), temp1)); @@ -1773,6 +1883,10 @@ void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + // Need temporary register for String compression feature. + if (mirror::kUseStringCompression) { + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -1800,29 +1914,41 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register src_ptr = XRegisterFrom(locations->GetTemp(0)); Register num_chr = XRegisterFrom(locations->GetTemp(1)); Register tmp1 = XRegisterFrom(locations->GetTemp(2)); + Register tmp3; + if (mirror::kUseStringCompression) { + tmp3 = WRegisterFrom(locations->GetTemp(3)); + } UseScratchRegisterScope temps(masm); Register dst_ptr = temps.AcquireX(); Register tmp2 = temps.AcquireX(); - // src address to copy from. - __ Add(src_ptr, srcObj, Operand(value_offset)); - __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1)); + vixl::aarch64::Label done; + vixl::aarch64::Label compressed_string_loop; + __ Sub(num_chr, srcEnd, srcBegin); + // Early out for valid zero-length retrievals. + __ Cbz(num_chr, &done); // dst address start to copy to. __ Add(dst_ptr, dstObj, Operand(data_offset)); __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1)); - __ Sub(num_chr, srcEnd, srcBegin); + // src address to copy from. + __ Add(src_ptr, srcObj, Operand(value_offset)); + vixl::aarch64::Label compressed_string_preloop; + if (mirror::kUseStringCompression) { + // Location of count in string. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + // String's length. + __ Ldr(tmp3, MemOperand(srcObj, count_offset)); + __ Tbnz(tmp3, kWRegSize - 1, &compressed_string_preloop); + } + __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1)); // Do the copy. vixl::aarch64::Label loop; - vixl::aarch64::Label done; vixl::aarch64::Label remainder; - // Early out for valid zero-length retrievals. - __ Cbz(num_chr, &done); - // Save repairing the value of num_chr on the < 8 character path. __ Subs(tmp1, num_chr, 8); __ B(lt, &remainder); @@ -1848,6 +1974,20 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Subs(num_chr, num_chr, 1); __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex)); __ B(gt, &remainder); + __ B(&done); + + if (mirror::kUseStringCompression) { + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + __ Bind(&compressed_string_preloop); + __ Add(src_ptr, src_ptr, Operand(srcBegin)); + // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. + __ Bind(&compressed_string_loop); + __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex)); + __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex)); + __ Subs(num_chr, num_chr, Operand(1)); + __ B(gt, &compressed_string_loop); + } __ Bind(&done); } |