diff options
Diffstat (limited to 'compiler/optimizing')
30 files changed, 819 insertions, 806 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 55e122150e..681988d2ac 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -4589,7 +4589,9 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { } // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + // Also need for String compression feature. + if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) + || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { locations->AddTemp(Location::RequiresRegister()); } } @@ -4602,6 +4604,8 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Location out_loc = locations->Out(); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); Primitive::Type type = instruction->GetType(); + const bool maybe_compressed_char_at = mirror::kUseStringCompression && + instruction->IsStringCharAt(); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. @@ -4615,10 +4619,31 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: { if (index.IsConstant()) { int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); - uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); + if (maybe_compressed_char_at) { + Register length = IP; + Label uncompressed_load, done; + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + __ LoadFromOffset(kLoadWord, length, obj, count_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ cmp(length, ShifterOperand(0)); + __ b(&uncompressed_load, GE); + __ LoadFromOffset(kLoadUnsignedByte, + out_loc.AsRegister<Register>(), + obj, + data_offset + const_index); + __ b(&done); + __ Bind(&uncompressed_load); + __ LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar), + out_loc.AsRegister<Register>(), + obj, + data_offset + (const_index << 1)); + __ Bind(&done); + } else { + uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); - LoadOperandType load_type = GetLoadOperandType(type); - __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset); + LoadOperandType load_type = GetLoadOperandType(type); + __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset); + } } else { Register temp = IP; @@ -4634,7 +4659,24 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } else { __ add(temp, obj, ShifterOperand(data_offset)); } - codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>()); + if (maybe_compressed_char_at) { + Label uncompressed_load, done; + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + Register length = locations->GetTemp(0).AsRegister<Register>(); + __ LoadFromOffset(kLoadWord, length, obj, count_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ cmp(length, ShifterOperand(0)); + __ b(&uncompressed_load, GE); + __ ldrb(out_loc.AsRegister<Register>(), + Address(temp, index.AsRegister<Register>(), Shift::LSL, 0)); + __ b(&done); + __ Bind(&uncompressed_load); + __ ldrh(out_loc.AsRegister<Register>(), + Address(temp, index.AsRegister<Register>(), Shift::LSL, 1)); + __ Bind(&done); + } else { + codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>()); + } } break; } @@ -4734,7 +4776,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { if (type == Primitive::kPrimNot) { // Potential implicit null checks, in the case of reference // arrays, are handled in the previous switch statement. - } else { + } else if (!maybe_compressed_char_at) { codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -5024,6 +5066,10 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadWord, out, obj, offset); codegen_->MaybeRecordImplicitNullCheck(instruction); + // Mask out compression flag from String's array length. + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + __ bic(out, out, ShifterOperand(1u << 31)); + } } void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) { diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index a2a2e426b6..4f7f36bb5a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -2052,7 +2052,8 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { Location index = locations->InAt(1); Location out = locations->Out(); uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); - + const bool maybe_compressed_char_at = mirror::kUseStringCompression && + instruction->IsStringCharAt(); MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); // Block pools between `Load` and `MaybeRecordImplicitNullCheck`. @@ -2070,9 +2071,28 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } else { // General case. MemOperand source = HeapOperand(obj); + Register length; + if (maybe_compressed_char_at) { + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + length = temps.AcquireW(); + __ Ldr(length, HeapOperand(obj, count_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); - source = HeapOperand(obj, offset); + if (maybe_compressed_char_at) { + vixl::aarch64::Label uncompressed_load, done; + __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load); + __ Ldrb(Register(OutputCPURegister(instruction)), + HeapOperand(obj, offset + Int64ConstantFrom(index))); + __ B(&done); + __ Bind(&uncompressed_load); + __ Ldrh(Register(OutputCPURegister(instruction)), + HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1))); + __ Bind(&done); + } else { + offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); + source = HeapOperand(obj, offset); + } } else { Register temp = temps.AcquireSameSizeAs(obj); if (instruction->GetArray()->IsIntermediateAddress()) { @@ -2090,11 +2110,24 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } else { __ Add(temp, obj, offset); } - source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); + if (maybe_compressed_char_at) { + vixl::aarch64::Label uncompressed_load, done; + __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load); + __ Ldrb(Register(OutputCPURegister(instruction)), + HeapOperand(temp, XRegisterFrom(index), LSL, 0)); + __ B(&done); + __ Bind(&uncompressed_load); + __ Ldrh(Register(OutputCPURegister(instruction)), + HeapOperand(temp, XRegisterFrom(index), LSL, 1)); + __ Bind(&done); + } else { + source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); + } + } + if (!maybe_compressed_char_at) { + codegen_->Load(type, OutputCPURegister(instruction), source); + codegen_->MaybeRecordImplicitNullCheck(instruction); } - - codegen_->Load(type, OutputCPURegister(instruction), source); - codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { static_assert( @@ -2118,9 +2151,14 @@ void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) { uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); + vixl::aarch64::Register out = OutputRegister(instruction); BlockPoolsScope block_pools(GetVIXLAssembler()); - __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), offset)); + __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); + // Mask out compression flag from String's array length. + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + __ And(out.W(), out.W(), Operand(static_cast<int32_t>(INT32_MAX))); + } } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { @@ -2312,7 +2350,6 @@ void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); - __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1)); __ B(slow_path->GetEntryLabel(), hs); } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index c3000805d1..a7051aeeb1 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -150,6 +150,9 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); } __ movl(length_loc.AsRegister<Register>(), array_len); + if (mirror::kUseStringCompression) { + __ andl(length_loc.AsRegister<Register>(), Immediate(INT32_MAX)); + } } x86_codegen->EmitParallelMoves( locations->InAt(0), @@ -5021,7 +5024,23 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimChar: { Register out = out_loc.AsRegister<Register>(); - __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset)); + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + // Branch cases into compressed and uncompressed for each index's type. + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + NearLabel done, not_compressed; + __ cmpl(Address(obj, count_offset), Immediate(0)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ j(kGreaterEqual, ¬_compressed); + __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset)); + __ jmp(&done); + __ Bind(¬_compressed); + __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset)); + __ Bind(&done); + } else { + // Common case for charAt of array of char or when string compression's + // feature is turned off. + __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset)); + } break; } @@ -5359,6 +5378,10 @@ void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) { Register out = locations->Out().AsRegister<Register>(); __ movl(out, Address(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); + // Mask out most significant bit in case the array is String's array of char. + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + __ andl(out, Immediate(INT32_MAX)); + } } void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) { @@ -5372,9 +5395,15 @@ void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) { if (!length->IsEmittedAtUseSite()) { locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); } + // Need register to see array's length. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + locations->AddTemp(Location::RequiresRegister()); + } } void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { + const bool is_string_compressed_char_at = + mirror::kUseStringCompression && instruction->IsStringCharAt(); LocationSummary* locations = instruction->GetLocations(); Location index_loc = locations->InAt(0); Location length_loc = locations->InAt(1); @@ -5409,13 +5438,23 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); Location array_loc = array_length->GetLocations()->InAt(0); Address array_len(array_loc.AsRegister<Register>(), len_offset); - if (index_loc.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); - __ cmpl(array_len, Immediate(value)); + if (is_string_compressed_char_at) { + Register length_reg = locations->GetTemp(0).AsRegister<Register>(); + __ movl(length_reg, array_len); + codegen_->MaybeRecordImplicitNullCheck(array_length); + __ andl(length_reg, Immediate(INT32_MAX)); + codegen_->GenerateIntCompare(length_reg, index_loc); } else { - __ cmpl(array_len, index_loc.AsRegister<Register>()); + // Checking bounds for general case: + // Array of char or string's array with feature compression off. + if (index_loc.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + __ cmpl(array_len, Immediate(value)); + } else { + __ cmpl(array_len, index_loc.AsRegister<Register>()); + } + codegen_->MaybeRecordImplicitNullCheck(array_length); } - codegen_->MaybeRecordImplicitNullCheck(array_length); } else { codegen_->GenerateIntCompare(length_loc, index_loc); } @@ -7278,13 +7317,17 @@ void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) { void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) { Register lhs_reg = lhs.AsRegister<Register>(); + GenerateIntCompare(lhs_reg, rhs); +} + +void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) { if (rhs.IsConstant()) { int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - Compare32BitValue(lhs_reg, value); + Compare32BitValue(lhs, value); } else if (rhs.IsStackSlot()) { - __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex())); + __ cmpl(lhs, Address(ESP, rhs.GetStackIndex())); } else { - __ cmpl(lhs_reg, rhs.AsRegister<Register>()); + __ cmpl(lhs, rhs.AsRegister<Register>()); } } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 1ae9af3b94..1bd28da178 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -474,6 +474,7 @@ class CodeGeneratorX86 : public CodeGenerator { // Compare int values. Supports only register locations for `lhs`. void GenerateIntCompare(Location lhs, Location rhs); + void GenerateIntCompare(Register lhs, Location rhs); // Construct address for array access. static Address ArrayAddress(Register obj, diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index f9a3e429d7..b243ee0c59 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -198,6 +198,9 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); } __ movl(length_loc.AsRegister<CpuRegister>(), array_len); + if (mirror::kUseStringCompression) { + __ andl(length_loc.AsRegister<CpuRegister>(), Immediate(INT32_MAX)); + } } // We're moving two locations to locations that could overlap, so we need a parallel @@ -4485,7 +4488,21 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimChar: { CpuRegister out = out_loc.AsRegister<CpuRegister>(); - __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + // Branch cases into compressed and uncompressed for each index's type. + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + NearLabel done, not_compressed; + __ cmpl(Address(obj, count_offset), Immediate(0)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ j(kGreaterEqual, ¬_compressed); + __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); + __ jmp(&done); + __ Bind(¬_compressed); + __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); + __ Bind(&done); + } else { + __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); + } break; } @@ -4807,6 +4824,10 @@ void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movl(out, Address(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); + // Mask out most significant bit in case the array is String's array of char. + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + __ andl(out, Immediate(INT32_MAX)); + } } void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { @@ -4856,13 +4877,23 @@ void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); Location array_loc = array_length->GetLocations()->InAt(0); Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); - if (index_loc.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); - __ cmpl(array_len, Immediate(value)); + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + CpuRegister length_reg = CpuRegister(TMP); + __ movl(length_reg, array_len); + codegen_->MaybeRecordImplicitNullCheck(array_length); + __ andl(length_reg, Immediate(INT32_MAX)); + codegen_->GenerateIntCompare(length_reg, index_loc); } else { - __ cmpl(array_len, index_loc.AsRegister<CpuRegister>()); + // Checking the bound for general case: + // Array of char or String's array when the compression feature off. + if (index_loc.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + __ cmpl(array_len, Immediate(value)); + } else { + __ cmpl(array_len, index_loc.AsRegister<CpuRegister>()); + } + codegen_->MaybeRecordImplicitNullCheck(array_length); } - codegen_->MaybeRecordImplicitNullCheck(array_length); } else { codegen_->GenerateIntCompare(length_loc, index_loc); } @@ -6525,13 +6556,17 @@ void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) { void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) { CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>(); + GenerateIntCompare(lhs_reg, rhs); +} + +void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) { if (rhs.IsConstant()) { int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - Compare32BitValue(lhs_reg, value); + Compare32BitValue(lhs, value); } else if (rhs.IsStackSlot()) { - __ cmpl(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); + __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex())); } else { - __ cmpl(lhs_reg, rhs.AsRegister<CpuRegister>()); + __ cmpl(lhs, rhs.AsRegister<CpuRegister>()); } } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 594f05157b..8dec44eb03 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -510,8 +510,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { void Compare32BitValue(CpuRegister dest, int32_t value); void Compare64BitValue(CpuRegister dest, int64_t value); - // Compare int values. Supports only register locations for `lhs`. + // Compare int values. Supports register locations for `lhs`. void GenerateIntCompare(Location lhs, Location rhs); + void GenerateIntCompare(CpuRegister lhs, Location rhs); // Compare long values. Supports only register locations for `lhs`. void GenerateLongCompare(Location lhs, Location rhs); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index f21dc0e7e4..af2fe9cb1f 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -41,7 +41,7 @@ #include "sharpening.h" #include "ssa_builder.h" #include "ssa_phi_elimination.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" #include "thread.h" namespace art { @@ -1321,7 +1321,7 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph, HConstantFolding fold(callee_graph); HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_); InstructionSimplifier simplify(callee_graph, stats_); - IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_); + IntrinsicsRecognizer intrinsics(callee_graph, stats_); HOptimization* optimizations[] = { &intrinsics, diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 3b08d9f989..f7d67db5b2 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -22,7 +22,7 @@ #include "dex_instruction-inl.h" #include "driver/compiler_options.h" #include "imtable-inl.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" namespace art { @@ -675,7 +675,7 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker(); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader()))); + soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader()))); Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass())); // We fetch the referenced class eagerly (that is, the class pointed by in the MethodId // at method_idx), as `CanAccessResolvedMethod` expects it be be in the dex cache. @@ -1284,7 +1284,7 @@ static mirror::Class* GetClassFrom(CompilerDriver* driver, ScopedObjectAccess soa(Thread::Current()); StackHandleScope<1> hs(soa.Self()); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader()))); + soa.Decode<mirror::ClassLoader>(compilation_unit.GetClassLoader()))); Handle<mirror::DexCache> dex_cache = compilation_unit.GetDexCache(); return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit); @@ -1303,7 +1303,7 @@ bool HInstructionBuilder::IsOutermostCompilingClass(uint16_t type_index) const { StackHandleScope<3> hs(soa.Self()); Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache(); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader()))); + soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader()))); Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass( soa, dex_cache, class_loader, type_index, dex_compilation_unit_))); Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass())); @@ -1344,7 +1344,7 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, StackHandleScope<3> hs(soa.Self()); Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache(); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader()))); + soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader()))); ArtField* resolved_field = compiler_driver_->ResolveField( soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index ff829af4c2..3bb1c1dc21 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -18,7 +18,7 @@ #include "intrinsics.h" #include "mirror/class-inl.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" namespace art { diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc index 495f3fd232..56e4c7a9c2 100644 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -44,6 +44,14 @@ void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) { size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); Primitive::Type type = instruction->GetType(); + // TODO: Implement reading (length + compression) for String compression feature from + // negative offset (count_offset - data_offset). Thumb2Assembler does not support T4 + // encoding of "LDR (immediate)" at the moment. + // Don't move array pointer if it is charAt because we need to take the count first. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + return; + } + if (type == Primitive::kPrimLong || type == Primitive::kPrimFloat || type == Primitive::kPrimDouble) { diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 6d107d571f..d0dd650024 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -140,6 +140,13 @@ void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) { void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + // Don't move the array pointer if it is charAt because we need to take the count first. + // TODO: Implement reading (length + compression) for String compression feature from + // negative offset (count_offset - data_offset) using LDP and clobbering an extra temporary. + // Note that "LDR (Immediate)" does not have a "signed offset" encoding. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + return; + } if (TryExtractArrayAccessAddress(instruction, instruction->GetArray(), instruction->GetIndex(), diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 4d4bbcf616..412ccfcf4f 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -18,14 +18,11 @@ #include "art_method.h" #include "class_linker.h" -#include "dex/quick/dex_file_method_inliner.h" -#include "dex/quick/dex_file_to_method_inliner_map.h" #include "driver/compiler_driver.h" #include "invoke_type.h" #include "mirror/dex_cache-inl.h" #include "nodes.h" -#include "quick/inline_method_analyser.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" #include "thread-inl.h" #include "utils.h" @@ -36,7 +33,7 @@ static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) { switch (i) { case Intrinsics::kNone: return kInterface; // Non-sensical for intrinsic. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return IsStatic; #include "intrinsics_list.h" @@ -52,7 +49,7 @@ static inline IntrinsicNeedsEnvironmentOrCache NeedsEnvironmentOrCache(Intrinsic switch (i) { case Intrinsics::kNone: return kNeedsEnvironmentOrCache; // Non-sensical for intrinsic. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return NeedsEnvironmentOrCache; #include "intrinsics_list.h" @@ -68,7 +65,7 @@ static inline IntrinsicSideEffects GetSideEffects(Intrinsics i) { switch (i) { case Intrinsics::kNone: return kAllSideEffects; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return SideEffects; #include "intrinsics_list.h" @@ -84,7 +81,7 @@ static inline IntrinsicExceptions GetExceptions(Intrinsics i) { switch (i) { case Intrinsics::kNone: return kCanThrow; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return Exceptions; #include "intrinsics_list.h" @@ -95,430 +92,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) return kCanThrow; } -static Primitive::Type GetType(uint64_t data, bool is_op_size) { - if (is_op_size) { - switch (static_cast<OpSize>(data)) { - case kSignedByte: - return Primitive::kPrimByte; - case kSignedHalf: - return Primitive::kPrimShort; - case k32: - return Primitive::kPrimInt; - case k64: - return Primitive::kPrimLong; - default: - LOG(FATAL) << "Unknown/unsupported op size " << data; - UNREACHABLE(); - } - } else { - if ((data & kIntrinsicFlagIsLong) != 0) { - return Primitive::kPrimLong; - } - if ((data & kIntrinsicFlagIsObject) != 0) { - return Primitive::kPrimNot; - } - return Primitive::kPrimInt; - } -} - -static Intrinsics GetIntrinsic(InlineMethod method) { - switch (method.opcode) { - // Floating-point conversions. - case kIntrinsicDoubleCvt: - return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ? - Intrinsics::kDoubleDoubleToRawLongBits : Intrinsics::kDoubleLongBitsToDouble; - case kIntrinsicFloatCvt: - return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ? - Intrinsics::kFloatFloatToRawIntBits : Intrinsics::kFloatIntBitsToFloat; - case kIntrinsicFloat2Int: - return Intrinsics::kFloatFloatToIntBits; - case kIntrinsicDouble2Long: - return Intrinsics::kDoubleDoubleToLongBits; - - // Floating-point tests. - case kIntrinsicFloatIsInfinite: - return Intrinsics::kFloatIsInfinite; - case kIntrinsicDoubleIsInfinite: - return Intrinsics::kDoubleIsInfinite; - case kIntrinsicFloatIsNaN: - return Intrinsics::kFloatIsNaN; - case kIntrinsicDoubleIsNaN: - return Intrinsics::kDoubleIsNaN; - - // Bit manipulations. - case kIntrinsicReverseBits: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerReverse; - case Primitive::kPrimLong: - return Intrinsics::kLongReverse; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicReverseBytes: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimShort: - return Intrinsics::kShortReverseBytes; - case Primitive::kPrimInt: - return Intrinsics::kIntegerReverseBytes; - case Primitive::kPrimLong: - return Intrinsics::kLongReverseBytes; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicRotateRight: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerRotateRight; - case Primitive::kPrimLong: - return Intrinsics::kLongRotateRight; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicRotateLeft: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerRotateLeft; - case Primitive::kPrimLong: - return Intrinsics::kLongRotateLeft; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - - // Misc data processing. - case kIntrinsicBitCount: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerBitCount; - case Primitive::kPrimLong: - return Intrinsics::kLongBitCount; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicCompare: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerCompare; - case Primitive::kPrimLong: - return Intrinsics::kLongCompare; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicHighestOneBit: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerHighestOneBit; - case Primitive::kPrimLong: - return Intrinsics::kLongHighestOneBit; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicLowestOneBit: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerLowestOneBit; - case Primitive::kPrimLong: - return Intrinsics::kLongLowestOneBit; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicNumberOfLeadingZeros: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerNumberOfLeadingZeros; - case Primitive::kPrimLong: - return Intrinsics::kLongNumberOfLeadingZeros; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicNumberOfTrailingZeros: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerNumberOfTrailingZeros; - case Primitive::kPrimLong: - return Intrinsics::kLongNumberOfTrailingZeros; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicSignum: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimInt: - return Intrinsics::kIntegerSignum; - case Primitive::kPrimLong: - return Intrinsics::kLongSignum; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - - // Abs. - case kIntrinsicAbsDouble: - return Intrinsics::kMathAbsDouble; - case kIntrinsicAbsFloat: - return Intrinsics::kMathAbsFloat; - case kIntrinsicAbsInt: - return Intrinsics::kMathAbsInt; - case kIntrinsicAbsLong: - return Intrinsics::kMathAbsLong; - - // Min/max. - case kIntrinsicMinMaxDouble: - return ((method.d.data & kIntrinsicFlagMin) == 0) ? - Intrinsics::kMathMaxDoubleDouble : Intrinsics::kMathMinDoubleDouble; - case kIntrinsicMinMaxFloat: - return ((method.d.data & kIntrinsicFlagMin) == 0) ? - Intrinsics::kMathMaxFloatFloat : Intrinsics::kMathMinFloatFloat; - case kIntrinsicMinMaxInt: - return ((method.d.data & kIntrinsicFlagMin) == 0) ? - Intrinsics::kMathMaxIntInt : Intrinsics::kMathMinIntInt; - case kIntrinsicMinMaxLong: - return ((method.d.data & kIntrinsicFlagMin) == 0) ? - Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong; - - // More math builtins. - case kIntrinsicCos: - return Intrinsics::kMathCos; - case kIntrinsicSin: - return Intrinsics::kMathSin; - case kIntrinsicAcos: - return Intrinsics::kMathAcos; - case kIntrinsicAsin: - return Intrinsics::kMathAsin; - case kIntrinsicAtan: - return Intrinsics::kMathAtan; - case kIntrinsicAtan2: - return Intrinsics::kMathAtan2; - case kIntrinsicCbrt: - return Intrinsics::kMathCbrt; - case kIntrinsicCosh: - return Intrinsics::kMathCosh; - case kIntrinsicExp: - return Intrinsics::kMathExp; - case kIntrinsicExpm1: - return Intrinsics::kMathExpm1; - case kIntrinsicHypot: - return Intrinsics::kMathHypot; - case kIntrinsicLog: - return Intrinsics::kMathLog; - case kIntrinsicLog10: - return Intrinsics::kMathLog10; - case kIntrinsicNextAfter: - return Intrinsics::kMathNextAfter; - case kIntrinsicSinh: - return Intrinsics::kMathSinh; - case kIntrinsicTan: - return Intrinsics::kMathTan; - case kIntrinsicTanh: - return Intrinsics::kMathTanh; - - // Misc math. - case kIntrinsicSqrt: - return Intrinsics::kMathSqrt; - case kIntrinsicCeil: - return Intrinsics::kMathCeil; - case kIntrinsicFloor: - return Intrinsics::kMathFloor; - case kIntrinsicRint: - return Intrinsics::kMathRint; - case kIntrinsicRoundDouble: - return Intrinsics::kMathRoundDouble; - case kIntrinsicRoundFloat: - return Intrinsics::kMathRoundFloat; - - // System.arraycopy. - case kIntrinsicSystemArrayCopyCharArray: - return Intrinsics::kSystemArrayCopyChar; - - case kIntrinsicSystemArrayCopy: - return Intrinsics::kSystemArrayCopy; - - // Thread.currentThread. - case kIntrinsicCurrentThread: - return Intrinsics::kThreadCurrentThread; - - // Memory.peek. - case kIntrinsicPeek: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimByte: - return Intrinsics::kMemoryPeekByte; - case Primitive::kPrimShort: - return Intrinsics::kMemoryPeekShortNative; - case Primitive::kPrimInt: - return Intrinsics::kMemoryPeekIntNative; - case Primitive::kPrimLong: - return Intrinsics::kMemoryPeekLongNative; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - - // Memory.poke. - case kIntrinsicPoke: - switch (GetType(method.d.data, true)) { - case Primitive::kPrimByte: - return Intrinsics::kMemoryPokeByte; - case Primitive::kPrimShort: - return Intrinsics::kMemoryPokeShortNative; - case Primitive::kPrimInt: - return Intrinsics::kMemoryPokeIntNative; - case Primitive::kPrimLong: - return Intrinsics::kMemoryPokeLongNative; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - - // String. - case kIntrinsicCharAt: - return Intrinsics::kStringCharAt; - case kIntrinsicCompareTo: - return Intrinsics::kStringCompareTo; - case kIntrinsicEquals: - return Intrinsics::kStringEquals; - case kIntrinsicGetCharsNoCheck: - return Intrinsics::kStringGetCharsNoCheck; - case kIntrinsicIsEmptyOrLength: - return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ? - Intrinsics::kStringLength : Intrinsics::kStringIsEmpty; - case kIntrinsicIndexOf: - return ((method.d.data & kIntrinsicFlagBase0) == 0) ? - Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf; - case kIntrinsicNewStringFromBytes: - return Intrinsics::kStringNewStringFromBytes; - case kIntrinsicNewStringFromChars: - return Intrinsics::kStringNewStringFromChars; - case kIntrinsicNewStringFromString: - return Intrinsics::kStringNewStringFromString; - - case kIntrinsicCas: - switch (GetType(method.d.data, false)) { - case Primitive::kPrimNot: - return Intrinsics::kUnsafeCASObject; - case Primitive::kPrimInt: - return Intrinsics::kUnsafeCASInt; - case Primitive::kPrimLong: - return Intrinsics::kUnsafeCASLong; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - case kIntrinsicUnsafeGet: { - const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile); - switch (GetType(method.d.data, false)) { - case Primitive::kPrimInt: - return is_volatile ? Intrinsics::kUnsafeGetVolatile : Intrinsics::kUnsafeGet; - case Primitive::kPrimLong: - return is_volatile ? Intrinsics::kUnsafeGetLongVolatile : Intrinsics::kUnsafeGetLong; - case Primitive::kPrimNot: - return is_volatile ? Intrinsics::kUnsafeGetObjectVolatile : Intrinsics::kUnsafeGetObject; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - } - case kIntrinsicUnsafePut: { - enum Sync { kNoSync, kVolatile, kOrdered }; - const Sync sync = - ((method.d.data & kIntrinsicFlagIsVolatile) != 0) ? kVolatile : - ((method.d.data & kIntrinsicFlagIsOrdered) != 0) ? kOrdered : - kNoSync; - switch (GetType(method.d.data, false)) { - case Primitive::kPrimInt: - switch (sync) { - case kNoSync: - return Intrinsics::kUnsafePut; - case kVolatile: - return Intrinsics::kUnsafePutVolatile; - case kOrdered: - return Intrinsics::kUnsafePutOrdered; - } - break; - case Primitive::kPrimLong: - switch (sync) { - case kNoSync: - return Intrinsics::kUnsafePutLong; - case kVolatile: - return Intrinsics::kUnsafePutLongVolatile; - case kOrdered: - return Intrinsics::kUnsafePutLongOrdered; - } - break; - case Primitive::kPrimNot: - switch (sync) { - case kNoSync: - return Intrinsics::kUnsafePutObject; - case kVolatile: - return Intrinsics::kUnsafePutObjectVolatile; - case kOrdered: - return Intrinsics::kUnsafePutObjectOrdered; - } - break; - default: - LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; - UNREACHABLE(); - } - break; - } - - // 1.8. - case kIntrinsicUnsafeGetAndAddInt: - return Intrinsics::kUnsafeGetAndAddInt; - case kIntrinsicUnsafeGetAndAddLong: - return Intrinsics::kUnsafeGetAndAddLong; - case kIntrinsicUnsafeGetAndSetInt: - return Intrinsics::kUnsafeGetAndSetInt; - case kIntrinsicUnsafeGetAndSetLong: - return Intrinsics::kUnsafeGetAndSetLong; - case kIntrinsicUnsafeGetAndSetObject: - return Intrinsics::kUnsafeGetAndSetObject; - case kIntrinsicUnsafeLoadFence: - return Intrinsics::kUnsafeLoadFence; - case kIntrinsicUnsafeStoreFence: - return Intrinsics::kUnsafeStoreFence; - case kIntrinsicUnsafeFullFence: - return Intrinsics::kUnsafeFullFence; - - // Virtual cases. - - case kIntrinsicReferenceGetReferent: - return Intrinsics::kReferenceGetReferent; - - // Quick inliner cases. Remove after refactoring. They are here so that we can use the - // compiler to warn on missing cases. - - case kInlineOpNop: - case kInlineOpReturnArg: - case kInlineOpNonWideConst: - case kInlineOpIGet: - case kInlineOpIPut: - case kInlineOpConstructor: - return Intrinsics::kNone; - - // String init cases, not intrinsics. - - case kInlineStringInit: - return Intrinsics::kNone; - - // No default case to make the compiler warn on missing cases. - } - return Intrinsics::kNone; -} - -static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile& dex_file) { - // The DexFileMethodInliner should have checked whether the methods are agreeing with - // what we expect, i.e., static methods are called as such. Add another check here for - // our expectations: - // +static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) { // Whenever the intrinsic is marked as static, report an error if we find an InvokeVirtual. // // Whenever the intrinsic is marked as direct and we find an InvokeVirtual, a devirtualization @@ -542,13 +116,9 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile return true; } if (invoke_type == kVirtual) { - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + ArtMethod* art_method = invoke->GetResolvedMethod(); ScopedObjectAccess soa(Thread::Current()); - ArtMethod* art_method = - class_linker->FindDexCache(soa.Self(), dex_file)->GetResolvedMethod( - invoke->GetDexMethodIndex(), class_linker->GetImagePointerSize()); - return art_method != nullptr && - (art_method->IsFinal() || art_method->GetDeclaringClass()->IsFinal()); + return (art_method->IsFinal() || art_method->GetDeclaringClass()->IsFinal()); } return false; @@ -561,8 +131,8 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile } } -// TODO: Refactor DexFileMethodInliner and have something nicer than InlineMethod. void IntrinsicsRecognizer::Run() { + ScopedObjectAccess soa(Thread::Current()); for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); @@ -570,26 +140,20 @@ void IntrinsicsRecognizer::Run() { HInstruction* inst = inst_it.Current(); if (inst->IsInvoke()) { HInvoke* invoke = inst->AsInvoke(); - InlineMethod method; - const DexFile& dex_file = invoke->GetDexFile(); - DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(&dex_file); - DCHECK(inliner != nullptr); - if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) { - Intrinsics intrinsic = GetIntrinsic(method); - - if (intrinsic != Intrinsics::kNone) { - if (!CheckInvokeType(intrinsic, invoke, dex_file)) { - LOG(WARNING) << "Found an intrinsic with unexpected invoke type: " - << intrinsic << " for " - << PrettyMethod(invoke->GetDexMethodIndex(), invoke->GetDexFile()) - << invoke->DebugName(); - } else { - invoke->SetIntrinsic(intrinsic, - NeedsEnvironmentOrCache(intrinsic), - GetSideEffects(intrinsic), - GetExceptions(intrinsic)); - MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized); - } + ArtMethod* art_method = invoke->GetResolvedMethod(); + if (art_method != nullptr && art_method->IsIntrinsic()) { + Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic()); + if (!CheckInvokeType(intrinsic, invoke)) { + LOG(WARNING) << "Found an intrinsic with unexpected invoke type: " + << intrinsic << " for " + << PrettyMethod(invoke->GetDexMethodIndex(), invoke->GetDexFile()) + << invoke->DebugName(); + } else { + invoke->SetIntrinsic(intrinsic, + NeedsEnvironmentOrCache(intrinsic), + GetSideEffects(intrinsic), + GetExceptions(intrinsic)); + MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized); } } } @@ -602,7 +166,7 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { case Intrinsics::kNone: os << "None"; break; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ os << # Name; \ break; diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 62f731d03f..1e73cf67df 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -34,17 +34,14 @@ static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000) // Recognize intrinsics from HInvoke nodes. class IntrinsicsRecognizer : public HOptimization { public: - IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats) - : HOptimization(graph, kIntrinsicsRecognizerPassName, stats), - driver_(driver) {} + IntrinsicsRecognizer(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, kIntrinsicsRecognizerPassName, stats) {} void Run() OVERRIDE; static constexpr const char* kIntrinsicsRecognizerPassName = "intrinsics_recognition"; private: - CompilerDriver* driver_; - DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer); }; @@ -58,7 +55,7 @@ class IntrinsicVisitor : public ValueObject { switch (invoke->GetIntrinsic()) { case Intrinsics::kNone: return; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, ...) \ case Intrinsics::k ## Name: \ Visit ## Name(invoke); \ return; @@ -73,7 +70,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, ...) \ virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } #include "intrinsics_list.h" diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index fd2da1004b..96a6ecbee9 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1039,6 +1039,11 @@ void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + // Need temporary registers for String compression's feature. + if (mirror::kUseStringCompression) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } @@ -1053,10 +1058,16 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { Register temp0 = locations->GetTemp(0).AsRegister<Register>(); Register temp1 = locations->GetTemp(1).AsRegister<Register>(); Register temp2 = locations->GetTemp(2).AsRegister<Register>(); + Register temp3, temp4; + if (mirror::kUseStringCompression) { + temp3 = locations->GetTemp(3).AsRegister<Register>(); + temp4 = locations->GetTemp(4).AsRegister<Register>(); + } Label loop; Label find_char_diff; Label end; + Label different_compression; // Get offsets of count and value fields within a string object. const int32_t count_offset = mirror::String::CountOffset().Int32Value(); @@ -1077,20 +1088,40 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { // Reference equality check, return 0 if same reference. __ subs(out, str, ShifterOperand(arg)); __ b(&end, EQ); - // Load lengths of this and argument strings. - __ ldr(temp2, Address(str, count_offset)); - __ ldr(temp1, Address(arg, count_offset)); + if (mirror::kUseStringCompression) { + // Load lengths of this and argument strings. + __ ldr(temp3, Address(str, count_offset)); + __ ldr(temp4, Address(arg, count_offset)); + // Clean out compression flag from lengths. + __ bic(temp0, temp3, ShifterOperand(0x80000000)); + __ bic(IP, temp4, ShifterOperand(0x80000000)); + } else { + // Load lengths of this and argument strings. + __ ldr(temp0, Address(str, count_offset)); + __ ldr(IP, Address(arg, count_offset)); + } // out = length diff. - __ subs(out, temp2, ShifterOperand(temp1)); + __ subs(out, temp0, ShifterOperand(IP)); // temp0 = min(len(str), len(arg)). - __ it(Condition::LT, kItElse); - __ mov(temp0, ShifterOperand(temp2), Condition::LT); - __ mov(temp0, ShifterOperand(temp1), Condition::GE); + __ it(GT); + __ mov(temp0, ShifterOperand(IP), GT); // Shorter string is empty? __ CompareAndBranchIfZero(temp0, &end); + if (mirror::kUseStringCompression) { + // Check if both strings using same compression style to use this comparison loop. + __ eors(temp3, temp3, ShifterOperand(temp4)); + __ b(&different_compression, MI); + } // Store offset of string value in preparation for comparison loop. __ mov(temp1, ShifterOperand(value_offset)); + if (mirror::kUseStringCompression) { + // For string compression, calculate the number of bytes to compare (not chars). + // This could in theory exceed INT32_MAX, so treat temp0 as unsigned. + __ cmp(temp4, ShifterOperand(0)); + __ it(GE); + __ add(temp0, temp0, ShifterOperand(temp0), GE); + } // Assertions that must hold in order to compare multiple characters at a time. CHECK_ALIGNED(value_offset, 8); @@ -1100,6 +1131,7 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); DCHECK_EQ(char_size, 2u); + Label find_char_diff_2nd_cmp; // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment). __ Bind(&loop); __ ldr(IP, Address(str, temp1)); @@ -1107,43 +1139,113 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { __ cmp(IP, ShifterOperand(temp2)); __ b(&find_char_diff, NE); __ add(temp1, temp1, ShifterOperand(char_size * 2)); - __ sub(temp0, temp0, ShifterOperand(2)); __ ldr(IP, Address(str, temp1)); __ ldr(temp2, Address(arg, temp1)); __ cmp(IP, ShifterOperand(temp2)); - __ b(&find_char_diff, NE); + __ b(&find_char_diff_2nd_cmp, NE); __ add(temp1, temp1, ShifterOperand(char_size * 2)); - __ subs(temp0, temp0, ShifterOperand(2)); - - __ b(&loop, GT); + // With string compression, we have compared 8 bytes, otherwise 4 chars. + __ subs(temp0, temp0, ShifterOperand(mirror::kUseStringCompression ? 8 : 4)); + __ b(&loop, HI); __ b(&end); - // Find the single 16-bit character difference. + __ Bind(&find_char_diff_2nd_cmp); + if (mirror::kUseStringCompression) { + __ subs(temp0, temp0, ShifterOperand(4)); // 4 bytes previously compared. + __ b(&end, LS); // Was the second comparison fully beyond the end? + } else { + // Without string compression, we can start treating temp0 as signed + // and rely on the signed comparison below. + __ sub(temp0, temp0, ShifterOperand(2)); + } + + // Find the single character difference. __ Bind(&find_char_diff); // Get the bit position of the first character that differs. __ eor(temp1, temp2, ShifterOperand(IP)); __ rbit(temp1, temp1); __ clz(temp1, temp1); - // temp0 = number of 16-bit characters remaining to compare. - // (it could be < 1 if a difference is found after the first SUB in the comparison loop, and - // after the end of the shorter string data). - - // (temp1 >> 4) = character where difference occurs between the last two words compared, on the - // interval [0,1] (0 for low half-word different, 1 for high half-word different). - - // If temp0 <= (temp1 >> 4), the difference occurs outside the remaining string data, so just - // return length diff (out). - __ cmp(temp0, ShifterOperand(temp1, LSR, 4)); - __ b(&end, LE); + // temp0 = number of characters remaining to compare. + // (Without string compression, it could be < 1 if a difference is found by the second CMP + // in the comparison loop, and after the end of the shorter string data). + + // Without string compression (temp1 >> 4) = character where difference occurs between the last + // two words compared, in the interval [0,1]. + // (0 for low half-word different, 1 for high half-word different). + // With string compression, (temp1 << 3) = byte where the difference occurs, + // in the interval [0,3]. + + // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside + // the remaining string data, so just return length diff (out). + // The comparison is unsigned for string compression, otherwise signed. + __ cmp(temp0, ShifterOperand(temp1, LSR, mirror::kUseStringCompression ? 3 : 4)); + __ b(&end, mirror::kUseStringCompression ? LS : LE); // Extract the characters and calculate the difference. + Label uncompressed_string, continue_process; + if (mirror::kUseStringCompression) { + __ cmp(temp4, ShifterOperand(0)); + __ b(&uncompressed_string, GE); + __ bic(temp1, temp1, ShifterOperand(0x7)); + __ b(&continue_process); + } + __ Bind(&uncompressed_string); __ bic(temp1, temp1, ShifterOperand(0xf)); + __ Bind(&continue_process); + __ Lsr(temp2, temp2, temp1); __ Lsr(IP, IP, temp1); + Label calculate_difference, uncompressed_string_extract_chars; + if (mirror::kUseStringCompression) { + __ cmp(temp4, ShifterOperand(0)); + __ b(&uncompressed_string_extract_chars, GE); + __ ubfx(temp2, temp2, 0, 8); + __ ubfx(IP, IP, 0, 8); + __ b(&calculate_difference); + } + __ Bind(&uncompressed_string_extract_chars); __ movt(temp2, 0); __ movt(IP, 0); + __ Bind(&calculate_difference); __ sub(out, IP, ShifterOperand(temp2)); + __ b(&end); + + if (mirror::kUseStringCompression) { + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + Label loop_arg_compressed, loop_this_compressed, find_diff; + // Comparison for different compression style. + // This part is when THIS is compressed and ARG is not. + __ Bind(&different_compression); + __ add(temp2, str, ShifterOperand(value_offset)); + __ add(temp3, arg, ShifterOperand(value_offset)); + __ cmp(temp4, ShifterOperand(0)); + __ b(&loop_arg_compressed, LT); + + __ Bind(&loop_this_compressed); + __ ldrb(IP, Address(temp2, c_char_size, Address::PostIndex)); + __ ldrh(temp4, Address(temp3, char_size, Address::PostIndex)); + __ cmp(IP, ShifterOperand(temp4)); + __ b(&find_diff, NE); + __ subs(temp0, temp0, ShifterOperand(1)); + __ b(&loop_this_compressed, GT); + __ b(&end); + + // This part is when THIS is not compressed and ARG is. + __ Bind(&loop_arg_compressed); + __ ldrh(IP, Address(temp2, char_size, Address::PostIndex)); + __ ldrb(temp4, Address(temp3, c_char_size, Address::PostIndex)); + __ cmp(IP, ShifterOperand(temp4)); + __ b(&find_diff, NE); + __ subs(temp0, temp0, ShifterOperand(1)); + __ b(&loop_arg_compressed, GT); + __ b(&end); + + // Calculate the difference. + __ Bind(&find_diff); + __ sub(out, IP, ShifterOperand(temp4)); + } __ Bind(&end); @@ -1180,7 +1282,7 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { Register temp1 = locations->GetTemp(1).AsRegister<Register>(); Register temp2 = locations->GetTemp(2).AsRegister<Register>(); - Label loop; + Label loop, preloop; Label end; Label return_true; Label return_false; @@ -1214,11 +1316,15 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { __ ldr(temp, Address(str, count_offset)); __ ldr(temp1, Address(arg, count_offset)); // Check if lengths are equal, return false if they're not. + // Also compares the compression style, if differs return false. __ cmp(temp, ShifterOperand(temp1)); __ b(&return_false, NE); // Return true if both strings are empty. + if (mirror::kUseStringCompression) { + // Length needs to be masked out first because 0 is treated as compressed. + __ bic(temp, temp, ShifterOperand(0x80000000)); + } __ cbz(temp, &return_true); - // Reference equality check, return true if same reference. __ cmp(str, ShifterOperand(arg)); __ b(&return_true, EQ); @@ -1227,10 +1333,19 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { DCHECK_ALIGNED(value_offset, 4); static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare."); - __ LoadImmediate(temp1, value_offset); - + if (mirror::kUseStringCompression) { + // If not compressed, directly to fast compare. Else do preprocess on length. + __ cmp(temp1, ShifterOperand(0)); + __ b(&preloop, GT); + // Mask out compression flag and adjust length for compressed string (8-bit) + // as if it is a 16-bit data, new_length = (length + 1) / 2. + __ add(temp, temp, ShifterOperand(1)); + __ Lsr(temp, temp, 1); + __ Bind(&preloop); + } // Loop to compare strings 2 characters at a time starting at the front of the string. // Ok to do this because strings with an odd length are zero-padded. + __ LoadImmediate(temp1, value_offset); __ Bind(&loop); __ ldr(out, Address(str, temp1)); __ ldr(temp2, Address(arg, temp1)); @@ -2330,22 +2445,31 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register src_ptr = locations->GetTemp(1).AsRegister<Register>(); Register dst_ptr = locations->GetTemp(2).AsRegister<Register>(); - // src range to copy. - __ add(src_ptr, srcObj, ShifterOperand(value_offset)); - __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1)); - + Label done, compressed_string_loop; // dst to be copied. __ add(dst_ptr, dstObj, ShifterOperand(data_offset)); __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1)); __ subs(num_chr, srcEnd, ShifterOperand(srcBegin)); - - // Do the copy. - Label loop, remainder, done; - // Early out for valid zero-length retrievals. __ b(&done, EQ); + // src range to copy. + __ add(src_ptr, srcObj, ShifterOperand(value_offset)); + Label compressed_string_preloop; + if (mirror::kUseStringCompression) { + // Location of count in string. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + // String's length. + __ ldr(IP, Address(srcObj, count_offset)); + __ cmp(IP, ShifterOperand(0)); + __ b(&compressed_string_preloop, LT); + } + __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1)); + + // Do the copy. + Label loop, remainder; + // Save repairing the value of num_chr on the < 4 character path. __ subs(IP, num_chr, ShifterOperand(4)); __ b(&remainder, LT); @@ -2374,6 +2498,20 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ subs(num_chr, num_chr, ShifterOperand(1)); __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex)); __ b(&remainder, GT); + __ b(&done); + + if (mirror::kUseStringCompression) { + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. + __ Bind(&compressed_string_preloop); + __ add(src_ptr, src_ptr, ShifterOperand(srcBegin)); + __ Bind(&compressed_string_loop); + __ ldrb(IP, Address(src_ptr, c_char_size, Address::PostIndex)); + __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex)); + __ subs(num_chr, num_chr, ShifterOperand(1)); + __ b(&compressed_string_loop, GT); + } __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h index c671700615..7f20ea4b1f 100644 --- a/compiler/optimizing/intrinsics_arm.h +++ b/compiler/optimizing/intrinsics_arm.h @@ -37,7 +37,7 @@ class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -64,7 +64,7 @@ class IntrinsicCodeGeneratorARM FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index ce58657bcd..e2c1802fdc 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1223,6 +1223,11 @@ void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + // Need temporary registers for String compression's feature. + if (mirror::kUseStringCompression) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } @@ -1239,10 +1244,16 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { Register temp0 = WRegisterFrom(locations->GetTemp(0)); Register temp1 = WRegisterFrom(locations->GetTemp(1)); Register temp2 = WRegisterFrom(locations->GetTemp(2)); + Register temp3, temp5; + if (mirror::kUseStringCompression) { + temp3 = WRegisterFrom(locations->GetTemp(3)); + temp5 = WRegisterFrom(locations->GetTemp(4)); + } vixl::aarch64::Label loop; vixl::aarch64::Label find_char_diff; vixl::aarch64::Label end; + vixl::aarch64::Label different_compression; // Get offsets of count and value fields within a string object. const int32_t count_offset = mirror::String::CountOffset().Int32Value(); @@ -1263,9 +1274,18 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { // Reference equality check, return 0 if same reference. __ Subs(out, str, arg); __ B(&end, eq); - // Load lengths of this and argument strings. - __ Ldr(temp0, HeapOperand(str, count_offset)); - __ Ldr(temp1, HeapOperand(arg, count_offset)); + if (mirror::kUseStringCompression) { + // Load lengths of this and argument strings. + __ Ldr(temp3, HeapOperand(str, count_offset)); + __ Ldr(temp5, HeapOperand(arg, count_offset)); + // Clean out compression flag from lengths. + __ Bic(temp0, temp3, Operand(static_cast<int32_t>(0x80000000))); + __ Bic(temp1, temp5, Operand(static_cast<int32_t>(0x80000000))); + } else { + // Load lengths of this and argument strings. + __ Ldr(temp0, HeapOperand(str, count_offset)); + __ Ldr(temp1, HeapOperand(arg, count_offset)); + } // Return zero if both strings are empty. __ Orr(out, temp0, temp1); __ Cbz(out, &end); @@ -1276,8 +1296,22 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { // Shorter string is empty? __ Cbz(temp2, &end); + if (mirror::kUseStringCompression) { + // Check if both strings using same compression style to use this comparison loop. + __ Eor(temp3.W(), temp3, Operand(temp5)); + __ Tbnz(temp3.W(), kWRegSize - 1, &different_compression); + } // Store offset of string value in preparation for comparison loop. __ Mov(temp1, value_offset); + if (mirror::kUseStringCompression) { + // For string compression, calculate the number of bytes to compare (not chars). + // This could be in theory exceed INT32_MAX, so treat temp2 as unsigned. + vixl::aarch64::Label let_it_signed; + __ Cmp(temp5, Operand(0)); + __ B(lt, &let_it_signed); + __ Add(temp2, temp2, Operand(temp2)); + __ Bind(&let_it_signed); + } UseScratchRegisterScope scratch_scope(masm); Register temp4 = scratch_scope.AcquireX(); @@ -1299,29 +1333,90 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Cmp(temp4, temp0); __ B(ne, &find_char_diff); __ Add(temp1, temp1, char_size * 4); - __ Subs(temp2, temp2, 4); - __ B(gt, &loop); + // With string compression, we have compared 8 bytes, otherwise 4 chars. + __ Subs(temp2, temp2, (mirror::kUseStringCompression) ? 8 : 4); + __ B(hi, &loop); __ B(&end); // Promote temp1 to an X reg, ready for EOR. temp1 = temp1.X(); - // Find the single 16-bit character difference. + // Find the single character difference. __ Bind(&find_char_diff); // Get the bit position of the first character that differs. __ Eor(temp1, temp0, temp4); __ Rbit(temp1, temp1); __ Clz(temp1, temp1); - // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then + // If the number of chars remaining <= the index where the difference occurs (0-3), then // the difference occurs outside the remaining string data, so just return length diff (out). - __ Cmp(temp2, Operand(temp1.W(), LSR, 4)); - __ B(le, &end); + // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the + // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or + // unsigned when string compression is disabled. + // When it's enabled, the comparison must be unsigned. + __ Cmp(temp2, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4)); + __ B(ls, &end); // Extract the characters and calculate the difference. + vixl::aarch64::Label uncompressed_string, continue_process; + if (mirror:: kUseStringCompression) { + __ Tbz(temp5, kWRegSize - 1, &uncompressed_string); + __ Bic(temp1, temp1, 0x7); + __ B(&continue_process); + } + __ Bind(&uncompressed_string); __ Bic(temp1, temp1, 0xf); + __ Bind(&continue_process); + __ Lsr(temp0, temp0, temp1); __ Lsr(temp4, temp4, temp1); + vixl::aarch64::Label uncompressed_string_extract_chars; + if (mirror::kUseStringCompression) { + __ Tbz(temp5, kWRegSize - 1, &uncompressed_string_extract_chars); + __ And(temp4, temp4, 0xff); + __ Sub(out, temp4.W(), Operand(temp0.W(), UXTB)); + __ B(&end); + } + __ Bind(&uncompressed_string_extract_chars); __ And(temp4, temp4, 0xffff); __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH)); + __ B(&end); + + if (mirror::kUseStringCompression) { + vixl::aarch64::Label loop_this_compressed, loop_arg_compressed, find_diff; + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + temp0 = temp0.W(); + temp1 = temp1.W(); + // Comparison for different compression style. + // This part is when THIS is compressed and ARG is not. + __ Bind(&different_compression); + __ Add(temp0, str, Operand(value_offset)); + __ Add(temp1, arg, Operand(value_offset)); + __ Cmp(temp5, Operand(0)); + __ B(lt, &loop_arg_compressed); + + __ Bind(&loop_this_compressed); + __ Ldrb(temp3, MemOperand(temp0.X(), c_char_size, PostIndex)); + __ Ldrh(temp5, MemOperand(temp1.X(), char_size, PostIndex)); + __ Cmp(temp3, Operand(temp5)); + __ B(ne, &find_diff); + __ Subs(temp2, temp2, 1); + __ B(gt, &loop_this_compressed); + __ B(&end); + + // This part is when THIS is not compressed and ARG is. + __ Bind(&loop_arg_compressed); + __ Ldrh(temp3, MemOperand(temp0.X(), char_size, PostIndex)); + __ Ldrb(temp5, MemOperand(temp1.X(), c_char_size, PostIndex)); + __ Cmp(temp3, Operand(temp5)); + __ B(ne, &find_diff); + __ Subs(temp2, temp2, 1); + __ B(gt, &loop_arg_compressed); + __ B(&end); + + // Calculate the difference. + __ Bind(&find_diff); + __ Sub(out, temp3.W(), Operand(temp5.W(), UXTH)); + } __ Bind(&end); @@ -1356,7 +1451,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { Register temp1 = WRegisterFrom(locations->GetTemp(0)); Register temp2 = WRegisterFrom(locations->GetTemp(1)); - vixl::aarch64::Label loop; + vixl::aarch64::Label loop, preloop; vixl::aarch64::Label end; vixl::aarch64::Label return_true; vixl::aarch64::Label return_false; @@ -1394,22 +1489,37 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { __ Ldr(temp, MemOperand(str.X(), count_offset)); __ Ldr(temp1, MemOperand(arg.X(), count_offset)); // Check if lengths are equal, return false if they're not. + // Also compares the compression style, if differs return false. __ Cmp(temp, temp1); __ B(&return_false, ne); - // Store offset of string value in preparation for comparison loop - __ Mov(temp1, value_offset); // Return true if both strings are empty. + if (mirror::kUseStringCompression) { + // Length needs to be masked out first because 0 is treated as compressed. + __ Bic(temp, temp, Operand(static_cast<int32_t>(0x80000000))); + } __ Cbz(temp, &return_true); // Assertions that must hold in order to compare strings 4 characters at a time. DCHECK_ALIGNED(value_offset, 8); static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); + if (mirror::kUseStringCompression) { + // If not compressed, directly to fast compare. Else do preprocess on length. + __ Cmp(temp1, Operand(0)); + __ B(&preloop, gt); + // Mask out compression flag and adjust length for compressed string (8-bit) + // as if it is a 16-bit data, new_length = (length + 1) / 2 + __ Add(temp, temp, 1); + __ Lsr(temp, temp, 1); + } + temp1 = temp1.X(); temp2 = temp2.X(); - // Loop to compare strings 4 characters at a time starting at the beginning of the string. // Ok to do this because strings are zero-padded to be 8-byte aligned. + // Store offset of string value in preparation for comparison loop + __ Bind(&preloop); + __ Mov(temp1, value_offset); __ Bind(&loop); __ Ldr(out, MemOperand(str.X(), temp1)); __ Ldr(temp2, MemOperand(arg.X(), temp1)); @@ -1773,6 +1883,10 @@ void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + // Need temporary register for String compression feature. + if (mirror::kUseStringCompression) { + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -1800,29 +1914,41 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register src_ptr = XRegisterFrom(locations->GetTemp(0)); Register num_chr = XRegisterFrom(locations->GetTemp(1)); Register tmp1 = XRegisterFrom(locations->GetTemp(2)); + Register tmp3; + if (mirror::kUseStringCompression) { + tmp3 = WRegisterFrom(locations->GetTemp(3)); + } UseScratchRegisterScope temps(masm); Register dst_ptr = temps.AcquireX(); Register tmp2 = temps.AcquireX(); - // src address to copy from. - __ Add(src_ptr, srcObj, Operand(value_offset)); - __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1)); + vixl::aarch64::Label done; + vixl::aarch64::Label compressed_string_loop; + __ Sub(num_chr, srcEnd, srcBegin); + // Early out for valid zero-length retrievals. + __ Cbz(num_chr, &done); // dst address start to copy to. __ Add(dst_ptr, dstObj, Operand(data_offset)); __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1)); - __ Sub(num_chr, srcEnd, srcBegin); + // src address to copy from. + __ Add(src_ptr, srcObj, Operand(value_offset)); + vixl::aarch64::Label compressed_string_preloop; + if (mirror::kUseStringCompression) { + // Location of count in string. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + // String's length. + __ Ldr(tmp3, MemOperand(srcObj, count_offset)); + __ Tbnz(tmp3, kWRegSize - 1, &compressed_string_preloop); + } + __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1)); // Do the copy. vixl::aarch64::Label loop; - vixl::aarch64::Label done; vixl::aarch64::Label remainder; - // Early out for valid zero-length retrievals. - __ Cbz(num_chr, &done); - // Save repairing the value of num_chr on the < 8 character path. __ Subs(tmp1, num_chr, 8); __ B(lt, &remainder); @@ -1848,6 +1974,20 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Subs(num_chr, num_chr, 1); __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex)); __ B(gt, &remainder); + __ B(&done); + + if (mirror::kUseStringCompression) { + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + __ Bind(&compressed_string_preloop); + __ Add(src_ptr, src_ptr, Operand(srcBegin)); + // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. + __ Bind(&compressed_string_loop); + __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex)); + __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex)); + __ Subs(num_chr, num_chr, Operand(1)); + __ B(gt, &compressed_string_loop); + } __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index 525153621b..28e41cb086 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -42,7 +42,7 @@ class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -66,7 +66,7 @@ class IntrinsicCodeGeneratorARM64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h deleted file mode 100644 index db60238fb4..0000000000 --- a/compiler/optimizing/intrinsics_list.h +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ -#define ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ - -// All intrinsics supported by the optimizing compiler. Format is name, then whether it is expected -// to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual), then whether it requires an -// environment, may have side effects, or may throw exceptions. - -#define INTRINSICS_LIST(V) \ - V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(DoubleDoubleToLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(DoubleIsInfinite, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(DoubleIsNaN, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(DoubleLongBitsToDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(FloatFloatToRawIntBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(FloatFloatToIntBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(FloatIsInfinite, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(FloatIsNaN, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerCompare, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerHighestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerLowestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(IntegerSignum, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongCompare, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongHighestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongLowestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(LongSignum, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(ShortReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAbsDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAbsFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAbsLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAbsInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMinDoubleDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMinFloatFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMinLongLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMinIntInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMaxDoubleDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathCos, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathSin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAcos, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAsin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAtan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathAtan2, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathCbrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathCosh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathExp, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathExpm1, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathHypot, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathLog, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathLog10, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathNextAfter, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathSinh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathTan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathTanh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathSqrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathCeil, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathFloor, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathRint, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathRoundDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MathRoundFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(SystemArrayCopyChar, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(SystemArrayCopy, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(ThreadCurrentThread, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ - V(MemoryPeekByte, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(MemoryPeekIntNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(MemoryPeekLongNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(MemoryPeekShortNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(MemoryPokeByte, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \ - V(MemoryPokeIntNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \ - V(MemoryPokeLongNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \ - V(MemoryPokeShortNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \ - V(StringCharAt, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(StringCompareTo, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(StringEquals, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(StringGetCharsNoCheck, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(StringIndexOf, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(StringIndexOfAfter, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ - V(StringIsEmpty, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \ - V(StringLength, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow) \ - V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeCASInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeCASLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeCASObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGet, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetObjectVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetLongVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePut, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutObjectOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutObjectVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutLongOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafePutLongVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetAndAddInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetAndAddLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetAndSetInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetAndSetLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeGetAndSetObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeLoadFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeStoreFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(UnsafeFullFence, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ - V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) - -#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ -#undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ // #define is only for lint. diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h index 575a7d0a23..e134cb882e 100644 --- a/compiler/optimizing/intrinsics_mips.h +++ b/compiler/optimizing/intrinsics_mips.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderMIPS FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -60,7 +60,7 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h index 4137fbd1b6..5b95c26a21 100644 --- a/compiler/optimizing/intrinsics_mips64.h +++ b/compiler/optimizing/intrinsics_mips64.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -60,7 +60,7 @@ class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index e61aba05b4..f41e4d95b5 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -1401,23 +1401,39 @@ void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) { __ cmpl(str, arg); __ j(kEqual, &return_true); - // Load length of receiver string. + // Load length and compression flag of receiver string. __ movl(ecx, Address(str, count_offset)); - // Check if lengths are equal, return false if they're not. + // Check if lengths and compression flags are equal, return false if they're not. + // Two identical strings will always have same compression style since + // compression style is decided on alloc. __ cmpl(ecx, Address(arg, count_offset)); __ j(kNotEqual, &return_false); - // Return true if both strings are empty. - __ jecxz(&return_true); + if (mirror::kUseStringCompression) { + NearLabel string_uncompressed; + // Differ cases into both compressed or both uncompressed. Different compression style + // is cut above. + __ cmpl(ecx, Immediate(0)); + __ j(kGreaterEqual, &string_uncompressed); + // Divide string length by 2, rounding up, and continue as if uncompressed. + // Merge clearing the compression flag (+0x80000000) with +1 for rounding. + __ addl(ecx, Immediate(0x80000001)); + __ shrl(ecx, Immediate(1)); + __ Bind(&string_uncompressed); + } + // Return true if strings are empty. + __ jecxz(&return_true); // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction. __ leal(esi, Address(str, value_offset)); __ leal(edi, Address(arg, value_offset)); - // Divide string length by 2 to compare characters 2 at a time and adjust for odd lengths. + // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not + // divisible by 2. __ addl(ecx, Immediate(1)); __ shrl(ecx, Immediate(1)); - // Assertions that must hold in order to compare strings 2 characters at a time. + // Assertions that must hold in order to compare strings 2 characters (uncompressed) + // or 4 characters (compressed) at a time. DCHECK_ALIGNED(value_offset, 4); static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded"); @@ -1461,6 +1477,10 @@ static void CreateStringIndexOfLocations(HInvoke* invoke, locations->AddTemp(Location::RegisterLocation(ECX)); // Need another temporary to be able to compute the result. locations->AddTemp(Location::RequiresRegister()); + if (mirror::kUseStringCompression) { + // Need another temporary to be able to save unflagged string length. + locations->AddTemp(Location::RequiresRegister()); + } } static void GenerateStringIndexOf(HInvoke* invoke, @@ -1478,6 +1498,8 @@ static void GenerateStringIndexOf(HInvoke* invoke, Register counter = locations->GetTemp(0).AsRegister<Register>(); Register string_length = locations->GetTemp(1).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); + // Only used when string compression feature is on. + Register string_length_flagged; // Check our assumptions for registers. DCHECK_EQ(string_obj, EDI); @@ -1515,6 +1537,12 @@ static void GenerateStringIndexOf(HInvoke* invoke, // Load string length, i.e., the count field of the string. __ movl(string_length, Address(string_obj, count_offset)); + if (mirror::kUseStringCompression) { + string_length_flagged = locations->GetTemp(2).AsRegister<Register>(); + __ movl(string_length_flagged, string_length); + // Mask out first bit used as compression flag. + __ andl(string_length, Immediate(INT32_MAX)); + } // Do a zero-length check. // TODO: Support jecxz. @@ -1540,20 +1568,50 @@ static void GenerateStringIndexOf(HInvoke* invoke, __ cmpl(start_index, Immediate(0)); __ cmovl(kGreater, counter, start_index); - // Move to the start of the string: string_obj + value_offset + 2 * start_index. - __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); - - // Now update ecx (the repne scasw work counter). We have string.length - start_index left to - // compare. + if (mirror::kUseStringCompression) { + NearLabel modify_counter, offset_uncompressed_label; + __ cmpl(string_length_flagged, Immediate(0)); + __ j(kGreaterEqual, &offset_uncompressed_label); + // Move to the start of the string: string_obj + value_offset + start_index. + __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset)); + __ jmp(&modify_counter); + + // Move to the start of the string: string_obj + value_offset + 2 * start_index. + __ Bind(&offset_uncompressed_label); + __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + + // Now update ecx (the repne scasw work counter). We have string.length - start_index left to + // compare. + __ Bind(&modify_counter); + } else { + __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + } __ negl(counter); __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); } - // Everything is set up for repne scasw: - // * Comparison address in EDI. - // * Counter in ECX. - __ repne_scasw(); - + if (mirror::kUseStringCompression) { + NearLabel uncompressed_string_comparison; + NearLabel comparison_done; + __ cmpl(string_length_flagged, Immediate(0)); + __ j(kGreater, &uncompressed_string_comparison); + + // Check if EAX (search_value) is ASCII. + __ cmpl(search_value, Immediate(127)); + __ j(kGreater, ¬_found_label); + // Comparing byte-per-byte. + __ repne_scasb(); + __ jmp(&comparison_done); + + // Everything is set up for repne scasw: + // * Comparison address in EDI. + // * Counter in ECX. + __ Bind(&uncompressed_string_comparison); + __ repne_scasw(); + __ Bind(&comparison_done); + } else { + __ repne_scasw(); + } // Did we find a match? __ j(kNotEqual, ¬_found_label); @@ -1706,38 +1764,64 @@ void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); DCHECK_EQ(char_size, 2u); - // Compute the address of the destination buffer. - __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); - - // Compute the address of the source string. - if (srcBegin.IsConstant()) { - // Compute the address of the source string by adding the number of chars from - // the source beginning to the value offset of a string. - __ leal(ESI, Address(obj, srcBegin_value * char_size + value_offset)); - } else { - __ leal(ESI, Address(obj, srcBegin.AsRegister<Register>(), - ScaleFactor::TIMES_2, value_offset)); - } - // Compute the number of chars (words) to move. - // Now is the time to save ECX, since we don't know if it will be used later. + // Save ECX, since we don't know if it will be used later. __ pushl(ECX); int stack_adjust = kX86WordSize; __ cfi().AdjustCFAOffset(stack_adjust); DCHECK_EQ(srcEnd, ECX); if (srcBegin.IsConstant()) { - if (srcBegin_value != 0) { - __ subl(ECX, Immediate(srcBegin_value)); - } + __ subl(ECX, Immediate(srcBegin_value)); } else { DCHECK(srcBegin.IsRegister()); __ subl(ECX, srcBegin.AsRegister<Register>()); } - // Do the move. + NearLabel done; + if (mirror::kUseStringCompression) { + // Location of count in string + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + __ pushl(EAX); + __ cfi().AdjustCFAOffset(stack_adjust); + + NearLabel copy_loop, copy_uncompressed; + __ cmpl(Address(obj, count_offset), Immediate(0)); + __ j(kGreaterEqual, ©_uncompressed); + // Compute the address of the source string by adding the number of chars from + // the source beginning to the value offset of a string. + __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset)); + + // Start the loop to copy String's value to Array of Char. + __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); + __ Bind(©_loop); + __ jecxz(&done); + // Use EAX temporary (convert byte from ESI to word). + // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0. + __ movzxb(EAX, Address(ESI, 0)); + __ movw(Address(EDI, 0), EAX); + __ leal(EDI, Address(EDI, char_size)); + __ leal(ESI, Address(ESI, c_char_size)); + // TODO: Add support for LOOP to X86Assembler. + __ subl(ECX, Immediate(1)); + __ jmp(©_loop); + __ Bind(©_uncompressed); + } + + // Do the copy for uncompressed string. + // Compute the address of the destination buffer. + __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); + __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset)); __ rep_movsw(); - // And restore ECX. + __ Bind(&done); + if (mirror::kUseStringCompression) { + // Restore EAX. + __ popl(EAX); + __ cfi().AdjustCFAOffset(-stack_adjust); + } + // Restore ECX. __ popl(ECX); __ cfi().AdjustCFAOffset(-stack_adjust); } diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index 08bd197400..3743cb1371 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -61,7 +61,7 @@ class IntrinsicCodeGeneratorX86 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 0f31fabbfb..4b0afca122 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -1568,14 +1568,27 @@ void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) { __ cmpl(str, arg); __ j(kEqual, &return_true); - // Load length of receiver string. + // Load length and compression flag of receiver string. __ movl(rcx, Address(str, count_offset)); - // Check if lengths are equal, return false if they're not. + // Check if lengths and compressiond flags are equal, return false if they're not. + // Two identical strings will always have same compression style since + // compression style is decided on alloc. __ cmpl(rcx, Address(arg, count_offset)); __ j(kNotEqual, &return_false); + + if (mirror::kUseStringCompression) { + NearLabel string_uncompressed; + // Both string are compressed. + __ cmpl(rcx, Immediate(0)); + __ j(kGreaterEqual, &string_uncompressed); + // Divide string length by 2, rounding up, and continue as if uncompressed. + // Merge clearing the compression flag with +1 for rounding. + __ addl(rcx, Immediate(static_cast<int32_t>(0x80000001))); + __ shrl(rcx, Immediate(1)); + __ Bind(&string_uncompressed); + } // Return true if both strings are empty. __ jrcxz(&return_true); - // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction. __ leal(rsi, Address(str, value_offset)); __ leal(rdi, Address(arg, value_offset)); @@ -1584,7 +1597,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) { __ addl(rcx, Immediate(3)); __ shrl(rcx, Immediate(2)); - // Assertions that must hold in order to compare strings 4 characters at a time. + // Assertions that must hold in order to compare strings 4 characters (uncompressed) + // or 8 characters (compressed) at a time. DCHECK_ALIGNED(value_offset, 8); static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded"); @@ -1674,7 +1688,8 @@ static void GenerateStringIndexOf(HInvoke* invoke, __ j(kAbove, slow_path->GetEntryLabel()); } - // From here down, we know that we are looking for a char that fits in 16 bits. + // From here down, we know that we are looking for a char that fits in + // 16 bits (uncompressed) or 8 bits (compressed). // Location of reference to data array within the String object. int32_t value_offset = mirror::String::ValueOffset().Int32Value(); // Location of count within the String object. @@ -1682,6 +1697,12 @@ static void GenerateStringIndexOf(HInvoke* invoke, // Load string length, i.e., the count field of the string. __ movl(string_length, Address(string_obj, count_offset)); + if (mirror::kUseStringCompression) { + // Use TMP to keep string_length_flagged. + __ movl(CpuRegister(TMP), string_length); + // Mask out first bit used as compression flag. + __ andl(string_length, Immediate(INT32_MAX)); + } // Do a length check. // TODO: Support jecxz. @@ -1692,7 +1713,6 @@ static void GenerateStringIndexOf(HInvoke* invoke, if (start_at_zero) { // Number of chars to scan is the same as the string length. __ movl(counter, string_length); - // Move to the start of the string. __ addq(string_obj, Immediate(value_offset)); } else { @@ -1707,19 +1727,44 @@ static void GenerateStringIndexOf(HInvoke* invoke, __ cmpl(start_index, Immediate(0)); __ cmov(kGreater, counter, start_index, /* is64bit */ false); // 32-bit copy is enough. - // Move to the start of the string: string_obj + value_offset + 2 * start_index. - __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); - + if (mirror::kUseStringCompression) { + NearLabel modify_counter, offset_uncompressed_label; + __ cmpl(CpuRegister(TMP), Immediate(0)); + __ j(kGreaterEqual, &offset_uncompressed_label); + __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset)); + __ jmp(&modify_counter); + // Move to the start of the string: string_obj + value_offset + 2 * start_index. + __ Bind(&offset_uncompressed_label); + __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + __ Bind(&modify_counter); + } else { + __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + } // Now update ecx, the work counter: it's gonna be string.length - start_index. __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit. __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); } - // Everything is set up for repne scasw: - // * Comparison address in RDI. - // * Counter in ECX. - __ repne_scasw(); - + if (mirror::kUseStringCompression) { + NearLabel uncompressed_string_comparison; + NearLabel comparison_done; + __ cmpl(CpuRegister(TMP), Immediate(0)); + __ j(kGreater, &uncompressed_string_comparison); + // Check if RAX (search_value) is ASCII. + __ cmpl(search_value, Immediate(127)); + __ j(kGreater, ¬_found_label); + // Comparing byte-per-byte. + __ repne_scasb(); + __ jmp(&comparison_done); + // Everything is set up for repne scasw: + // * Comparison address in RDI. + // * Counter in ECX. + __ Bind(&uncompressed_string_comparison); + __ repne_scasw(); + __ Bind(&comparison_done); + } else { + __ repne_scasw(); + } // Did we find a match? __ j(kNotEqual, ¬_found_label); @@ -1871,32 +1916,54 @@ void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); DCHECK_EQ(char_size, 2u); - // Compute the address of the destination buffer. - __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); - - // Compute the address of the source string. - if (srcBegin.IsConstant()) { - // Compute the address of the source string by adding the number of chars from - // the source beginning to the value offset of a string. - __ leaq(CpuRegister(RSI), Address(obj, srcBegin_value * char_size + value_offset)); - } else { - __ leaq(CpuRegister(RSI), Address(obj, srcBegin.AsRegister<CpuRegister>(), - ScaleFactor::TIMES_2, value_offset)); - } - + NearLabel done; // Compute the number of chars (words) to move. __ movl(CpuRegister(RCX), srcEnd); if (srcBegin.IsConstant()) { - if (srcBegin_value != 0) { - __ subl(CpuRegister(RCX), Immediate(srcBegin_value)); - } + __ subl(CpuRegister(RCX), Immediate(srcBegin_value)); } else { DCHECK(srcBegin.IsRegister()); __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>()); } + if (mirror::kUseStringCompression) { + NearLabel copy_uncompressed, copy_loop; + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + // Location of count in string. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + __ cmpl(Address(obj, count_offset), Immediate(0)); + __ j(kGreaterEqual, ©_uncompressed); + // Compute the address of the source string by adding the number of chars from + // the source beginning to the value offset of a string. + __ leaq(CpuRegister(RSI), + CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset)); + // Start the loop to copy String's value to Array of Char. + __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); + + __ Bind(©_loop); + __ jrcxz(&done); + // Use TMP as temporary (convert byte from RSI to word). + // TODO: Selecting RAX as the temporary and using LODSB/STOSW. + __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0)); + __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP)); + __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size)); + __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size)); + // TODO: Add support for LOOP to X86_64Assembler. + __ subl(CpuRegister(RCX), Immediate(1)); + __ jmp(©_loop); + + __ Bind(©_uncompressed); + } + + __ leaq(CpuRegister(RSI), + CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset)); + // Compute the address of the destination buffer. + __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); // Do the move. __ rep_movsw(); + + __ Bind(&done); } static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index 155ff6548b..97404aa568 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -61,7 +61,7 @@ class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 9cfa89b7d0..ef9bf23a17 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -25,7 +25,7 @@ #include "base/stl_util.h" #include "intrinsics.h" #include "mirror/class-inl.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" namespace art { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 4dc4c20003..397abded27 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -32,6 +32,7 @@ #include "handle.h" #include "handle_scope.h" #include "invoke_type.h" +#include "intrinsics_enum.h" #include "locations.h" #include "method_reference.h" #include "mirror/class.h" @@ -3690,17 +3691,6 @@ class HNewInstance FINAL : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HNewInstance); }; -enum class Intrinsics { -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ - k ## Name, -#include "intrinsics_list.h" - kNone, - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS -}; -std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic); - enum IntrinsicNeedsEnvironmentOrCache { kNoEnvironmentOrCache, // Intrinsic does not require an environment or dex cache. kNeedsEnvironmentOrCache // Intrinsic requires an environment or requires a dex cache. diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index c5d761183a..d3a55dd365 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -56,7 +56,6 @@ #include "dead_code_elimination.h" #include "debug/elf_debug_writer.h" #include "debug/method_debug_info.h" -#include "dex/quick/dex_file_to_method_inliner_map.h" #include "dex/verification_results.h" #include "dex/verified_method.h" #include "driver/compiler_driver-inl.h" @@ -479,7 +478,7 @@ static HOptimization* BuildOptimization( } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) { return new (arena) InstructionSimplifier(graph, stats, pass_name.c_str()); } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) { - return new (arena) IntrinsicsRecognizer(graph, driver, stats); + return new (arena) IntrinsicsRecognizer(graph, stats); } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) { CHECK(most_recent_side_effects != nullptr); return new (arena) LICM(graph, *most_recent_side_effects, stats); @@ -743,7 +742,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, graph, stats, "instruction_simplifier$after_bce"); InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier$before_codegen"); - IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver, stats); + IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats); HOptimization* optimizations1[] = { intrinsics, @@ -899,7 +898,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, ScopedObjectAccess soa(Thread::Current()); StackHandleScope<1> hs(soa.Self()); Handle<mirror::ClassLoader> loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(class_loader))); + soa.Decode<mirror::ClassLoader>(class_loader))); method = compiler_driver->ResolveMethod( soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type); } diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index dd5cb1c9bb..2a23c92f1f 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -22,7 +22,7 @@ #include "common_compiler_test.h" #include "dex_file.h" #include "dex_instruction.h" -#include "handle_scope-inl.h" +#include "handle_scope.h" #include "scoped_thread_state_change.h" #include "ssa_builder.h" #include "ssa_liveness_analysis.h" diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 4289cf7e0f..5a47df1a0d 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -20,7 +20,7 @@ #include "class_linker-inl.h" #include "mirror/class-inl.h" #include "mirror/dex_cache.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" namespace art { diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index abec55f25c..a4a3e0695d 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -31,7 +31,7 @@ #include "mirror/string.h" #include "nodes.h" #include "runtime.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" namespace art { |