diff options
Diffstat (limited to 'compiler')
23 files changed, 605 insertions, 232 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 02c176c8fa..2666835b12 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -75,6 +75,7 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/ssa_liveness_analysis.cc \ optimizing/ssa_phi_elimination.cc \ optimizing/stack_map_stream.cc \ + optimizing/x86_memory_gen.cc \ trampolines/trampoline_compiler.cc \ utils/assembler.cc \ utils/swap_space.cc \ diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 672018b355..41b19601b9 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -444,7 +444,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(72U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(20U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(133 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); + EXPECT_EQ(164 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); } TEST_F(OatTest, OatHeaderIsValid) { diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 4520f9b3e3..d40e2b9ad1 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -314,7 +314,8 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, void CodeGenerator::CreateCommonInvokeLocationSummary( HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor) { ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena(); - LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kCall); + LocationSummary* locations = new (allocator) LocationSummary(invoke, + LocationSummary::kCallOnMainOnly); for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); @@ -378,7 +379,7 @@ void CodeGenerator::CreateUnresolvedFieldLocationSummary( ArenaAllocator* allocator = field_access->GetBlock()->GetGraph()->GetArena(); LocationSummary* locations = - new (allocator) LocationSummary(field_access, LocationSummary::kCall); + new (allocator) LocationSummary(field_access, LocationSummary::kCallOnMainOnly); locations->AddTemp(calling_convention.GetFieldIndexLocation()); @@ -499,7 +500,7 @@ void CodeGenerator::CreateLoadClassLocationSummary(HLoadClass* cls, bool code_generator_supports_read_barrier) { ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena(); LocationSummary::CallKind call_kind = cls->NeedsAccessCheck() - ? LocationSummary::kCall + ? LocationSummary::kCallOnMainOnly : (((code_generator_supports_read_barrier && kEmitCompilerReadBarrier) || cls->CanCallRuntime()) ? LocationSummary::kCallOnSlowPath diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 9364be35ff..b8540baca2 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -350,6 +350,16 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // accessing the String's `value` field in String intrinsics. static uint32_t GetArrayDataOffset(HArrayGet* array_get); + // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`. + template <size_t pointer_size> + static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) { + DCHECK_LT(reg, 32u); + // The ReadBarrierMarkRegX entry points are ordered by increasing + // register number in Thread::tls_Ptr_.quick_entrypoints. + return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value() + + pointer_size * reg; + } + void EmitParallelMoves(Location from1, Location to1, Primitive::Type type1, diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 690ecc3429..1aa7b5404c 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -412,8 +412,8 @@ class ArraySetSlowPathARM : public SlowPathCode { // Slow path marking an object during a read barrier. class ReadBarrierMarkSlowPathARM : public SlowPathCode { public: - ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location out, Location obj) - : SlowPathCode(instruction), out_(out), obj_(obj) { + ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location obj) + : SlowPathCode(instruction), obj_(obj) { DCHECK(kEmitCompilerReadBarrier); } @@ -421,9 +421,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Register reg_out = out_.AsRegister<Register>(); + Register reg = obj_.AsRegister<Register>(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -437,24 +437,44 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); + // Save live registers before the runtime call, and in particular + // R0 (if it is live), as it is clobbered by functions + // art_quick_read_barrier_mark_regX. SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); - arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_); - arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), + DCHECK_NE(reg, SP); + DCHECK_NE(reg, LR); + DCHECK_NE(reg, PC); + DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in R0): + // + // R0 <- obj + // R0 <- ReadBarrierMark(R0) + // obj <- R0 + // + // we just use rX (the register holding `obj`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmWordSize>(reg); + // TODO: Do not emit a stack map for this runtime call. + arm_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); - arm_codegen->Move32(out_, Location::RegisterLocation(R0)); RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); } private: - const Location out_; const Location obj_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM); @@ -2014,7 +2034,7 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) { (((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble) && result_type == Primitive::kPrimLong) || (input_type == Primitive::kPrimLong && result_type == Primitive::kPrimFloat)) - ? LocationSummary::kCall + ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind); @@ -2833,13 +2853,13 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; if (div->GetResultType() == Primitive::kPrimLong) { // pLdiv runtime call. - call_kind = LocationSummary::kCall; + call_kind = LocationSummary::kCallOnMainOnly; } else if (div->GetResultType() == Primitive::kPrimInt && div->InputAt(1)->IsConstant()) { // sdiv will be replaced by other instruction sequence. } else if (div->GetResultType() == Primitive::kPrimInt && !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { // pIdivmod runtime call. - call_kind = LocationSummary::kCall; + call_kind = LocationSummary::kCallOnMainOnly; } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind); @@ -2958,7 +2978,7 @@ void LocationsBuilderARM::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); // Most remainders are implemented in the runtime. - LocationSummary::CallKind call_kind = LocationSummary::kCall; + LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly; if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) { // sdiv will be replaced by other instruction sequence. call_kind = LocationSummary::kNoCall; @@ -3495,7 +3515,7 @@ void InstructionCodeGeneratorARM::VisitUShr(HUShr* ushr) { void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); if (instruction->IsStringAlloc()) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); } else { @@ -3528,7 +3548,7 @@ void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetOut(Location::RegisterLocation(R0)); @@ -5449,7 +5469,7 @@ void InstructionCodeGeneratorARM::VisitClearException(HClearException* clear ATT void LocationsBuilderARM::VisitThrow(HThrow* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -5850,7 +5870,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -6174,7 +6194,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct // Slow path used to mark the GC root `root`. SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root); + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root); codegen_->AddSlowPath(slow_path); // IP = Thread::Current()->GetIsGcMarking() @@ -6277,21 +6297,12 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // /* LockWord */ lock_word = LockWord(monitor) static_assert(sizeof(LockWord) == sizeof(int32_t), "art::LockWord and int32_t have different sizes."); - // /* uint32_t */ rb_state = lock_word.ReadBarrierState() - __ Lsr(temp_reg, temp_reg, LockWord::kReadBarrierStateShift); - __ and_(temp_reg, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask)); - static_assert( - LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, - "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); - // Introduce a dependency on the high bits of rb_state, which shall - // be all zeroes, to prevent load-load reordering, and without using + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using // a memory barrier (which would be more expensive). - // IP = rb_state & ~LockWord::kReadBarrierStateMask = 0 - __ bic(IP, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask)); - // obj is unchanged by this operation, but its value now depends on - // IP, which depends on temp_reg. - __ add(obj, obj, ShifterOperand(IP)); + // obj is unchanged by this operation, but its value now depends on temp_reg. + __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32)); // The actual reference load. if (index.IsValid()) { @@ -6323,13 +6334,19 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Slow path used to mark the object `ref` when it is gray. SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref, ref); + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); AddSlowPath(slow_path); // if (rb_state == ReadBarrier::gray_ptr_) // ref = ReadBarrier::Mark(ref); - __ cmp(temp_reg, ShifterOperand(ReadBarrier::gray_ptr_)); - __ b(slow_path->GetEntryLabel(), EQ); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1); + __ b(slow_path->GetEntryLabel(), CS); // Carry flag is the last bit shifted out by LSRS. __ Bind(slow_path->GetExitLabel()); } @@ -6953,21 +6970,25 @@ void LocationsBuilderARM::VisitClassTableGet(HClassTableGet* instruction) { void InstructionCodeGeneratorARM::VisitClassTableGet(HClassTableGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - uint32_t method_offset = 0; if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { - method_offset = mirror::Class::EmbeddedVTableEntryOffset( + uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( instruction->GetIndex(), kArmPointerSize).SizeValue(); + __ LoadFromOffset(kLoadWord, + locations->Out().AsRegister<Register>(), + locations->InAt(0).AsRegister<Register>(), + method_offset); } else { - __ LoadFromOffset(kLoadWord, locations->Out().AsRegister<Register>(), - locations->InAt(0).AsRegister<Register>(), - mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value()); - method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( + uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( instruction->GetIndex() % ImTable::kSize, kArmPointerSize)); + __ LoadFromOffset(kLoadWord, + locations->Out().AsRegister<Register>(), + locations->InAt(0).AsRegister<Register>(), + mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value()); + __ LoadFromOffset(kLoadWord, + locations->Out().AsRegister<Register>(), + locations->Out().AsRegister<Register>(), + method_offset); } - __ LoadFromOffset(kLoadWord, - locations->Out().AsRegister<Register>(), - locations->InAt(0).AsRegister<Register>(), - method_offset); } #undef __ diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index c8d33d5743..d9d675e0a7 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -580,8 +580,8 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { // Slow path marking an object during a read barrier. class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { public: - ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj) - : SlowPathCodeARM64(instruction), out_(out), obj_(obj) { + ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location obj) + : SlowPathCodeARM64(instruction), obj_(obj) { DCHECK(kEmitCompilerReadBarrier); } @@ -589,9 +589,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Primitive::Type type = Primitive::kPrimNot; DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(obj_.reg())); DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -605,24 +604,44 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { << instruction_->DebugName(); __ Bind(GetEntryLabel()); + // Save live registers before the runtime call, and in particular + // W0 (if it is live), as it is clobbered by functions + // art_quick_read_barrier_mark_regX. SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), obj_, type); - arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), + DCHECK_NE(obj_.reg(), LR); + DCHECK_NE(obj_.reg(), WSP); + DCHECK_NE(obj_.reg(), WZR); + DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg(); + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in W0): + // + // W0 <- obj + // W0 <- ReadBarrierMark(W0) + // obj <- W0 + // + // we just use rX (the register holding `obj`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64WordSize>(obj_.reg()); + // TODO: Do not emit a stack map for this runtime call. + arm64_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); - arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); } private: - const Location out_; const Location obj_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); @@ -4273,7 +4292,7 @@ void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant AT void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); } @@ -4371,7 +4390,7 @@ void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) { void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetOut(LocationFrom(x0)); @@ -4396,7 +4415,7 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; if (instruction->IsStringAlloc()) { locations->AddTemp(LocationFrom(kArtMethodRegister)); @@ -4549,7 +4568,8 @@ void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) void LocationsBuilderARM64::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); LocationSummary::CallKind call_kind = - Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall; + Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly + : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); switch (type) { @@ -4766,7 +4786,7 @@ void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); } @@ -5061,7 +5081,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru // Slow path used to mark the GC root `root`. SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root, root); + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root); codegen_->AddSlowPath(slow_path); MacroAssembler* masm = GetVIXLAssembler(); @@ -5206,23 +5226,12 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // /* LockWord */ lock_word = LockWord(monitor) static_assert(sizeof(LockWord) == sizeof(int32_t), "art::LockWord and int32_t have different sizes."); - // /* uint32_t */ rb_state = lock_word.ReadBarrierState() - __ Lsr(temp, temp, LockWord::kReadBarrierStateShift); - __ And(temp, temp, Operand(LockWord::kReadBarrierStateMask)); - static_assert( - LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, - "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); - // Introduce a dependency on the high bits of rb_state, which shall - // be all zeroes, to prevent load-load reordering, and without using + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using // a memory barrier (which would be more expensive). - // temp2 = rb_state & ~LockWord::kReadBarrierStateMask = 0 - Register temp2 = temps.AcquireW(); - __ Bic(temp2, temp, Operand(LockWord::kReadBarrierStateMask)); - // obj is unchanged by this operation, but its value now depends on - // temp2, which depends on temp. - __ Add(obj, obj, Operand(temp2)); - temps.Release(temp2); + // obj is unchanged by this operation, but its value now depends on temp. + __ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32)); // The actual reference load. if (index.IsValid()) { @@ -5248,7 +5257,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor); Load(type, ref_reg, HeapOperand(obj, computed_offset)); } else { - temp2 = temps.AcquireW(); + Register temp2 = temps.AcquireW(); __ Add(temp2, obj, offset); Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor)); temps.Release(temp2); @@ -5269,13 +5278,16 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // Slow path used to mark the object `ref` when it is gray. SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref, ref); + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); AddSlowPath(slow_path); // if (rb_state == ReadBarrier::gray_ptr_) // ref = ReadBarrier::Mark(ref); - __ Cmp(temp, ReadBarrier::gray_ptr_); - __ B(eq, slow_path->GetEntryLabel()); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -5350,18 +5362,19 @@ void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) { void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - uint32_t method_offset = 0; if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { - method_offset = mirror::Class::EmbeddedVTableEntryOffset( + uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( instruction->GetIndex(), kArm64PointerSize).SizeValue(); + __ Ldr(XRegisterFrom(locations->Out()), + MemOperand(XRegisterFrom(locations->InAt(0)), method_offset)); } else { + uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( + instruction->GetIndex() % ImTable::kSize, kArm64PointerSize)); __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)), mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); - method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex() % ImTable::kSize, kArm64PointerSize)); + __ Ldr(XRegisterFrom(locations->Out()), + MemOperand(XRegisterFrom(locations->Out()), method_offset)); } - __ Ldr(XRegisterFrom(locations->Out()), - MemOperand(XRegisterFrom(locations->InAt(0)), method_offset)); } diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index b6dca95354..2b71da0d1c 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1855,7 +1855,7 @@ void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) { bool needs_runtime_call = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall); + needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall); if (needs_runtime_call) { InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -2467,7 +2467,7 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* inst void LocationsBuilderMIPS::VisitDiv(HDiv* div) { Primitive::Type type = div->GetResultType(); LocationSummary::CallKind call_kind = (type == Primitive::kPrimLong) - ? LocationSummary::kCall + ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind); @@ -3430,7 +3430,7 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble); bool generate_volatile = field_info.IsVolatile() && is_wide; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall); + instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (generate_volatile) { @@ -3557,7 +3557,7 @@ void LocationsBuilderMIPS::HandleFieldSet(HInstruction* instruction, const Field bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble); bool generate_volatile = field_info.IsVolatile() && is_wide; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, generate_volatile ? LocationSummary::kCall : LocationSummary::kNoCall); + instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (generate_volatile) { @@ -4218,7 +4218,7 @@ void InstructionCodeGeneratorMIPS::VisitLongConstant(HLongConstant* constant ATT void LocationsBuilderMIPS::VisitMonitorOperation(HMonitorOperation* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -4397,7 +4397,7 @@ void InstructionCodeGeneratorMIPS::VisitNeg(HNeg* instruction) { void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); @@ -4423,7 +4423,7 @@ void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) { void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; if (instruction->IsStringAlloc()) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); @@ -4593,7 +4593,7 @@ void InstructionCodeGeneratorMIPS::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) void LocationsBuilderMIPS::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); LocationSummary::CallKind call_kind = - (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCall; + (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCallOnMainOnly; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); switch (type) { @@ -4830,7 +4830,7 @@ void InstructionCodeGeneratorMIPS::VisitSuspendCheck(HSuspendCheck* instruction) void LocationsBuilderMIPS::VisitThrow(HThrow* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -4859,7 +4859,7 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) { if (!isR6 && ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) { - call_kind = LocationSummary::kCall; + call_kind = LocationSummary::kCallOnMainOnly; } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind); @@ -5380,22 +5380,25 @@ void LocationsBuilderMIPS::VisitClassTableGet(HClassTableGet* instruction) { void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - uint32_t method_offset = 0; if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { - method_offset = mirror::Class::EmbeddedVTableEntryOffset( + uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( instruction->GetIndex(), kMipsPointerSize).SizeValue(); + __ LoadFromOffset(kLoadWord, + locations->Out().AsRegister<Register>(), + locations->InAt(0).AsRegister<Register>(), + method_offset); } else { + uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( + instruction->GetIndex() % ImTable::kSize, kMipsPointerSize)); __ LoadFromOffset(kLoadWord, locations->Out().AsRegister<Register>(), locations->InAt(0).AsRegister<Register>(), mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value()); - method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex() % ImTable::kSize, kMipsPointerSize)); + __ LoadFromOffset(kLoadWord, + locations->Out().AsRegister<Register>(), + locations->Out().AsRegister<Register>(), + method_offset); } - __ LoadFromOffset(kLoadWord, - locations->Out().AsRegister<Register>(), - locations->InAt(0).AsRegister<Register>(), - method_offset); } #undef __ diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 9f2664c0a5..aa1ba84178 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1436,7 +1436,7 @@ void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) { bool needs_runtime_call = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall); + needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall); if (needs_runtime_call) { InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -3292,7 +3292,7 @@ void InstructionCodeGeneratorMIPS64::VisitLongConstant(HLongConstant* constant A void LocationsBuilderMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -3419,7 +3419,7 @@ void InstructionCodeGeneratorMIPS64::VisitNeg(HNeg* instruction) { void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); @@ -3440,7 +3440,7 @@ void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) { void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; if (instruction->IsStringAlloc()) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); @@ -3600,7 +3600,8 @@ void InstructionCodeGeneratorMIPS64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED void LocationsBuilderMIPS64::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); LocationSummary::CallKind call_kind = - Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall; + Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly + : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); switch (type) { @@ -3813,7 +3814,7 @@ void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instructio void LocationsBuilderMIPS64::VisitThrow(HThrow* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index be20f1f7cc..1cc6060f68 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -140,12 +140,29 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } + + // Are we using an array length from memory? + HInstruction* array_length = instruction_->InputAt(1); + Location length_loc = locations->InAt(1); InvokeRuntimeCallingConvention calling_convention; + if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) { + // Load the array length into our temporary. + uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); + Location array_loc = array_length->GetLocations()->InAt(0); + Address array_len(array_loc.AsRegister<Register>(), len_offset); + length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1)); + // Check for conflicts with index. + if (length_loc.Equals(locations->InAt(0))) { + // We know we aren't using parameter 2. + length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); + } + __ movl(length_loc.AsRegister<Register>(), array_len); + } x86_codegen->EmitParallelMoves( locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt, - locations->InAt(1), + length_loc, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() @@ -430,8 +447,8 @@ class ArraySetSlowPathX86 : public SlowPathCode { // Slow path marking an object during a read barrier. class ReadBarrierMarkSlowPathX86 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location out, Location obj) - : SlowPathCode(instruction), out_(out), obj_(obj) { + ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj) + : SlowPathCode(instruction), obj_(obj) { DCHECK(kEmitCompilerReadBarrier); } @@ -439,9 +456,9 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Register reg_out = out_.AsRegister<Register>(); + Register reg = obj_.AsRegister<Register>(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -455,24 +472,42 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); + // Save live registers before the runtime call, and in particular + // EAX (if it is live), as it is clobbered by functions + // art_quick_read_barrier_mark_regX. SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); - x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_); - x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), + DCHECK_NE(reg, ESP); + DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in EAX): + // + // EAX <- obj + // EAX <- ReadBarrierMark(EAX) + // obj <- EAX + // + // we just use rX (the register holding `obj`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86WordSize>(reg); + // TODO: Do not emit a stack map for this runtime call. + x86_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); - x86_codegen->Move32(out_, Location::RegisterLocation(EAX)); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } private: - const Location out_; const Location obj_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86); @@ -2185,7 +2220,7 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { LocationSummary::CallKind call_kind = ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble) && result_type == Primitive::kPrimLong) - ? LocationSummary::kCall + ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind); @@ -3440,7 +3475,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr void LocationsBuilderX86::VisitDiv(HDiv* div) { LocationSummary::CallKind call_kind = (div->GetResultType() == Primitive::kPrimLong) - ? LocationSummary::kCall + ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind); @@ -3543,7 +3578,7 @@ void LocationsBuilderX86::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong) - ? LocationSummary::kCall + ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); @@ -3985,7 +4020,7 @@ void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) { void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); locations->SetOut(Location::RegisterLocation(EAX)); if (instruction->IsStringAlloc()) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); @@ -4018,7 +4053,7 @@ void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); locations->SetOut(Location::RegisterLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -4073,20 +4108,21 @@ void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) { void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - uint32_t method_offset = 0; if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { - method_offset = mirror::Class::EmbeddedVTableEntryOffset( + uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( instruction->GetIndex(), kX86PointerSize).SizeValue(); + __ movl(locations->Out().AsRegister<Register>(), + Address(locations->InAt(0).AsRegister<Register>(), method_offset)); } else { - __ movl(locations->InAt(0).AsRegister<Register>(), - Address(locations->InAt(0).AsRegister<Register>(), - mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value())); - // temp = temp->GetImtEntryAt(method_offset); - method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( + uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( instruction->GetIndex() % ImTable::kSize, kX86PointerSize)); + __ movl(locations->Out().AsRegister<Register>(), + Address(locations->InAt(0).AsRegister<Register>(), + mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value())); + // temp = temp->GetImtEntryAt(method_offset); + __ movl(locations->Out().AsRegister<Register>(), + Address(locations->Out().AsRegister<Register>(), method_offset)); } - __ movl(locations->Out().AsRegister<Register>(), - Address(locations->InAt(0).AsRegister<Register>(), method_offset)); } void LocationsBuilderX86::VisitNot(HNot* not_) { @@ -5517,10 +5553,16 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + if (!instruction->IsEmittedAtUseSite()) { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } } void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) { + if (instruction->IsEmittedAtUseSite()) { + return; + } + LocationSummary* locations = instruction->GetLocations(); uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); Register obj = locations->InAt(0).AsRegister<Register>(); @@ -5535,7 +5577,10 @@ void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) { : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + HInstruction* length = instruction->InputAt(1); + if (!length->IsEmittedAtUseSite()) { + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + } if (instruction->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } @@ -5569,12 +5614,28 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { codegen_->AddSlowPath(slow_path); __ j(kAboveEqual, slow_path->GetEntryLabel()); } else { - Register length = length_loc.AsRegister<Register>(); - if (index_loc.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); - __ cmpl(length, Immediate(value)); + HInstruction* array_length = instruction->InputAt(1); + if (array_length->IsEmittedAtUseSite()) { + // Address the length field in the array. + DCHECK(array_length->IsArrayLength()); + uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); + Location array_loc = array_length->GetLocations()->InAt(0); + Address array_len(array_loc.AsRegister<Register>(), len_offset); + if (index_loc.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + __ cmpl(array_len, Immediate(value)); + } else { + __ cmpl(array_len, index_loc.AsRegister<Register>()); + } + codegen_->MaybeRecordImplicitNullCheck(array_length); } else { - __ cmpl(length, index_loc.AsRegister<Register>()); + Register length = length_loc.AsRegister<Register>(); + if (index_loc.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + __ cmpl(length, Immediate(value)); + } else { + __ cmpl(length, index_loc.AsRegister<Register>()); + } } codegen_->AddSlowPath(slow_path); __ j(kBelowEqual, slow_path->GetEntryLabel()); @@ -6242,7 +6303,7 @@ void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATT void LocationsBuilderX86::VisitThrow(HThrow* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -6694,7 +6755,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -6933,7 +6994,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct // Slow path used to mark the GC root `root`. SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root, root); + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root); codegen_->AddSlowPath(slow_path); __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86WordSize>().Int32Value()), @@ -7063,7 +7124,7 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Slow path used to mark the object `ref` when it is gray. SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref, ref); + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref); AddSlowPath(slow_path); // if (rb_state == ReadBarrier::gray_ptr_) diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index cac33cddb8..a0158938b5 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -194,14 +194,31 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } + // Are we using an array length from memory? + HInstruction* array_length = instruction_->InputAt(1); + Location length_loc = locations->InAt(1); + InvokeRuntimeCallingConvention calling_convention; + if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) { + // Load the array length into our temporary. + uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); + Location array_loc = array_length->GetLocations()->InAt(0); + Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); + length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1)); + // Check for conflicts with index. + if (length_loc.Equals(locations->InAt(0))) { + // We know we aren't using parameter 2. + length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); + } + __ movl(length_loc.AsRegister<CpuRegister>(), array_len); + } + // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. - InvokeRuntimeCallingConvention calling_convention; codegen->EmitParallelMoves( locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt, - locations->InAt(1), + length_loc, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() @@ -451,8 +468,8 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { // Slow path marking an object during a read barrier. class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj) - : SlowPathCode(instruction), out_(out), obj_(obj) { + ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj) + : SlowPathCode(instruction), obj_(obj) { DCHECK(kEmitCompilerReadBarrier); } @@ -460,9 +477,9 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Register reg_out = out_.AsRegister<Register>(); + Register reg = obj_.AsRegister<Register>(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -476,24 +493,42 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); + // Save live registers before the runtime call, and in particular + // RDI and/or RAX (if they are live), as they are clobbered by + // functions art_quick_read_barrier_mark_regX. SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); - x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_); - x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), - instruction_, - instruction_->GetDexPc(), - this); - CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); - x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); + DCHECK_NE(reg, RSP); + DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in R0): + // + // RDI <- obj + // RAX <- ReadBarrierMark(RDI) + // obj <- RAX + // + // we just use rX (the register holding `obj`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64WordSize>(reg); + // TODO: Do not emit a stack map for this runtime call. + x86_64_codegen->InvokeRuntime(entry_point_offset, + instruction_, + instruction_->GetDexPc(), + this); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } private: - const Location out_; const Location obj_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); @@ -3913,7 +3948,7 @@ void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) { void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; if (instruction->IsStringAlloc()) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); @@ -3946,7 +3981,7 @@ void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetOut(Location::RegisterLocation(RAX)); @@ -4006,19 +4041,20 @@ void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) { void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - uint32_t method_offset = 0; if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { - method_offset = mirror::Class::EmbeddedVTableEntryOffset( + uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( instruction->GetIndex(), kX86_64PointerSize).SizeValue(); + __ movq(locations->Out().AsRegister<CpuRegister>(), + Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset)); } else { + uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( + instruction->GetIndex() % ImTable::kSize, kX86_64PointerSize)); __ movq(locations->Out().AsRegister<CpuRegister>(), Address(locations->InAt(0).AsRegister<CpuRegister>(), mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); - method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex() % ImTable::kSize, kX86_64PointerSize)); + __ movq(locations->Out().AsRegister<CpuRegister>(), + Address(locations->Out().AsRegister<CpuRegister>(), method_offset)); } - __ movq(locations->Out().AsRegister<CpuRegister>(), - Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset)); } void LocationsBuilderX86_64::VisitNot(HNot* not_) { @@ -4987,10 +5023,16 @@ void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + if (!instruction->IsEmittedAtUseSite()) { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } } void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) { + if (instruction->IsEmittedAtUseSite()) { + return; + } + LocationSummary* locations = instruction->GetLocations(); uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); @@ -5005,7 +5047,10 @@ void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + HInstruction* length = instruction->InputAt(1); + if (!length->IsEmittedAtUseSite()) { + locations->SetInAt(1, Location::RegisterOrConstant(length)); + } if (instruction->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } @@ -5015,8 +5060,7 @@ void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) LocationSummary* locations = instruction->GetLocations(); Location index_loc = locations->InAt(0); Location length_loc = locations->InAt(1); - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction); if (length_loc.IsConstant()) { int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant()); @@ -5039,12 +5083,28 @@ void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) codegen_->AddSlowPath(slow_path); __ j(kAboveEqual, slow_path->GetEntryLabel()); } else { - CpuRegister length = length_loc.AsRegister<CpuRegister>(); - if (index_loc.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); - __ cmpl(length, Immediate(value)); + HInstruction* array_length = instruction->InputAt(1); + if (array_length->IsEmittedAtUseSite()) { + // Address the length field in the array. + DCHECK(array_length->IsArrayLength()); + uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); + Location array_loc = array_length->GetLocations()->InAt(0); + Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); + if (index_loc.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + __ cmpl(array_len, Immediate(value)); + } else { + __ cmpl(array_len, index_loc.AsRegister<CpuRegister>()); + } + codegen_->MaybeRecordImplicitNullCheck(array_length); } else { - __ cmpl(length, index_loc.AsRegister<CpuRegister>()); + CpuRegister length = length_loc.AsRegister<CpuRegister>(); + if (index_loc.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + __ cmpl(length, Immediate(value)); + } else { + __ cmpl(length, index_loc.AsRegister<CpuRegister>()); + } } codegen_->AddSlowPath(slow_path); __ j(kBelowEqual, slow_path->GetEntryLabel()); @@ -5654,7 +5714,7 @@ void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -6164,7 +6224,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -6385,7 +6445,7 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr // Slow path used to mark the GC root `root`. SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root); + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root); codegen_->AddSlowPath(slow_path); __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(), @@ -6516,7 +6576,7 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // Slow path used to mark the object `ref` when it is gray. SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref); + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref); AddSlowPath(slow_path); // if (rb_state == ReadBarrier::gray_ptr_) diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 9d67373321..e14f603fe1 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -401,6 +401,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void VisitArrayLength(HArrayLength* array_length) OVERRIDE { StartAttributeStream("is_string_length") << std::boolalpha << array_length->IsStringLength() << std::noboolalpha; + if (array_length->IsEmittedAtUseSite()) { + StartAttributeStream("emitted_at_use") << "true"; + } } void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE { diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index e0410dcdb2..4ca0600dba 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -920,6 +920,7 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); + bool integral_type = Primitive::IsIntegralType(instruction->GetType()); if ((input_cst != nullptr) && input_cst->IsArithmeticZero()) { // Replace code looking like // ADD dst, src, 0 @@ -928,7 +929,7 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { // Note that we cannot optimize `x + 0.0` to `x` for floating-point. When // `x` is `-0.0`, the former expression yields `0.0`, while the later // yields `-0.0`. - if (Primitive::IsIntegralType(instruction->GetType())) { + if (integral_type) { instruction->ReplaceWith(input_other); instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); @@ -974,10 +975,31 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { // so no need to return. TryHandleAssociativeAndCommutativeOperation(instruction); - if ((instruction->GetLeft()->IsSub() || instruction->GetRight()->IsSub()) && + if ((left->IsSub() || right->IsSub()) && TrySubtractionChainSimplification(instruction)) { return; } + + if (integral_type) { + // Replace code patterns looking like + // SUB dst1, x, y SUB dst1, x, y + // ADD dst2, dst1, y ADD dst2, y, dst1 + // with + // SUB dst1, x, y + // ADD instruction is not needed in this case, we may use + // one of inputs of SUB instead. + if (left->IsSub() && left->InputAt(1) == right) { + instruction->ReplaceWith(left->InputAt(0)); + RecordSimplification(); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } else if (right->IsSub() && right->InputAt(1) == left) { + instruction->ReplaceWith(right->InputAt(0)); + RecordSimplification(); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + } } void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { @@ -1511,6 +1533,29 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { if (TrySubtractionChainSimplification(instruction)) { return; } + + if (left->IsAdd()) { + // Replace code patterns looking like + // ADD dst1, x, y ADD dst1, x, y + // SUB dst2, dst1, y SUB dst2, dst1, x + // with + // ADD dst1, x, y + // SUB instruction is not needed in this case, we may use + // one of inputs of ADD instead. + // It is applicable to integral types only. + DCHECK(Primitive::IsIntegralType(type)); + if (left->InputAt(1) == right) { + instruction->ReplaceWith(left->InputAt(0)); + RecordSimplification(); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } else if (left->InputAt(0) == right) { + instruction->ReplaceWith(left->InputAt(1)); + RecordSimplification(); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + } } void InstructionSimplifierVisitor::VisitUShr(HUShr* instruction) { diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 579fb9d3bb..bbdcee431a 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1212,7 +1212,7 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke, void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's // best to align the inputs accordingly. @@ -1232,7 +1232,7 @@ void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) { void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's // best to align the inputs accordingly. @@ -1250,7 +1250,7 @@ void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) { void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1280,7 +1280,7 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromBytes(HInvoke* invoke) { void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1307,7 +1307,7 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromChars(HInvoke* invoke) { void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1665,7 +1665,7 @@ static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble); LocationSummary* const locations = new (arena) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); const InvokeRuntimeCallingConvention calling_convention; @@ -1692,7 +1692,7 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble); LocationSummary* const locations = new (arena) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); const InvokeRuntimeCallingConvention calling_convention; diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 1d507530aa..16438a7594 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1405,7 +1405,7 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke, void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's // best to align the inputs accordingly. @@ -1425,7 +1425,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) { void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's // best to align the inputs accordingly. @@ -1443,7 +1443,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) { void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); @@ -1473,7 +1473,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); @@ -1500,7 +1500,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); @@ -1531,7 +1531,7 @@ static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { DCHECK(Primitive::IsFloatingPointType(invoke->GetType())); LocationSummary* const locations = new (arena) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; @@ -1546,7 +1546,7 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) DCHECK(Primitive::IsFloatingPointType(invoke->GetType())); LocationSummary* const locations = new (arena) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index d4f44d63e2..0bfa02512f 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -1875,7 +1875,7 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) { // int java.lang.String.compareTo(String anotherString) void IntrinsicLocationsBuilderMIPS::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -2071,7 +2071,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // int java.lang.String.indexOf(int ch) void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime // calling convention. So it's best to align the inputs accordingly. @@ -2096,7 +2096,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringIndexOf(HInvoke* invoke) { // int java.lang.String.indexOf(int ch, int fromIndex) void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime // calling convention. So it's best to align the inputs accordingly. @@ -2122,7 +2122,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringIndexOfAfter(HInvoke* invoke) { // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount) void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -2155,7 +2155,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromChars(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -2187,7 +2187,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromChars(HInvoke* invoke) // java.lang.StringFactory.newStringFromString(String toCopy) void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromString(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index cc4971b8f8..a9807bd0d2 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1519,7 +1519,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) { // int java.lang.String.compareTo(String anotherString) void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1707,7 +1707,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // int java.lang.String.indexOf(int ch) void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime // calling convention. So it's best to align the inputs accordingly. @@ -1728,7 +1728,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) { // int java.lang.String.indexOf(int ch, int fromIndex) void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime // calling convention. So it's best to align the inputs accordingly. @@ -1748,7 +1748,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount) void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1783,7 +1783,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1816,7 +1816,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromChars(HInvoke* invoke // java.lang.StringFactory.newStringFromString(String toCopy) void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 812bdf550e..6c81421713 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -706,7 +706,7 @@ static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, // We have to fall back to a call to the intrinsic. LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCall); + LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(Location::FpuRegisterLocation(XMM0)); @@ -774,7 +774,7 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { // We have to fall back to a call to the intrinsic. LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall); + LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(Location::RegisterLocation(EAX)); @@ -831,7 +831,7 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); @@ -985,7 +985,7 @@ void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) { static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); @@ -1216,7 +1216,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) { void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) { // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1490,7 +1490,7 @@ void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) { void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1518,7 +1518,7 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) { void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1543,7 +1543,7 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) { void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 891aaf5ff9..28f1f4f15b 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -526,7 +526,7 @@ static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, // We have to fall back to a call to the intrinsic. LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCall); + LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(Location::FpuRegisterLocation(XMM0)); @@ -588,7 +588,7 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, // We have to fall back to a call to the intrinsic. LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCall); + LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(Location::RegisterLocation(RAX)); @@ -699,7 +699,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); @@ -839,7 +839,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) { static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); @@ -1303,7 +1303,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1577,7 +1577,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1606,7 +1606,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -1632,7 +1632,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 3f27c911be..7a78bfdc8d 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -481,7 +481,7 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { enum CallKind { kNoCall, kCallOnSlowPath, - kCall + kCallOnMainOnly }; LocationSummary(HInstruction* instruction, @@ -541,7 +541,7 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { Location Out() const { return output_; } bool CanCall() const { return call_kind_ != kNoCall; } - bool WillCall() const { return call_kind_ == kCall; } + bool WillCall() const { return call_kind_ == kCallOnMainOnly; } bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; } bool NeedsSafepoint() const { return CanCall(); } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index d703b0f94f..d6e09d7acb 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -37,6 +37,10 @@ #include "pc_relative_fixups_x86.h" #endif +#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) +#include "x86_memory_gen.h" +#endif + #include "art_method-inl.h" #include "base/arena_allocator.h" #include "base/arena_containers.h" @@ -485,13 +489,27 @@ static void RunArchOptimizations(InstructionSet instruction_set, case kX86: { x86::PcRelativeFixups* pc_relative_fixups = new (arena) x86::PcRelativeFixups(graph, codegen, stats); + x86::X86MemoryOperandGeneration* memory_gen = + new(arena) x86::X86MemoryOperandGeneration(graph, stats, codegen); HOptimization* x86_optimizations[] = { - pc_relative_fixups + pc_relative_fixups, + memory_gen }; RunOptimizations(x86_optimizations, arraysize(x86_optimizations), pass_observer); break; } #endif +#ifdef ART_ENABLE_CODEGEN_x86_64 + case kX86_64: { + x86::X86MemoryOperandGeneration* memory_gen = + new(arena) x86::X86MemoryOperandGeneration(graph, stats, codegen); + HOptimization* x86_64_optimizations[] = { + memory_gen + }; + RunOptimizations(x86_64_optimizations, arraysize(x86_64_optimizations), pass_observer); + break; + } +#endif default: break; } diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 9d99668484..1b33408b7e 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1346,9 +1346,15 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { // Find an available spill slot. size_t slot = 0; for (size_t e = spill_slots->size(); slot < e; ++slot) { - if ((*spill_slots)[slot] <= parent->GetStart() - && (slot == (e - 1) || (*spill_slots)[slot + 1] <= parent->GetStart())) { - break; + if ((*spill_slots)[slot] <= parent->GetStart()) { + if (!parent->NeedsTwoSpillSlots()) { + // One spill slot is sufficient. + break; + } + if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) { + // Two spill slots are available. + break; + } } } diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc new file mode 100644 index 0000000000..195159f61b --- /dev/null +++ b/compiler/optimizing/x86_memory_gen.cc @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "x86_memory_gen.h" +#include "code_generator.h" + +namespace art { +namespace x86 { + +/** + * Replace instructions with memory operand forms. + */ +class MemoryOperandVisitor : public HGraphVisitor { + public: + MemoryOperandVisitor(HGraph* graph, bool do_implicit_null_checks) + : HGraphVisitor(graph), + do_implicit_null_checks_(do_implicit_null_checks) {} + + private: + void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE { + // Replace the length by the array itself, so that we can do compares to memory. + HArrayLength* array_len = check->InputAt(1)->AsArrayLength(); + + // We only want to replace an ArrayLength. + if (array_len == nullptr) { + return; + } + + HInstruction* array = array_len->InputAt(0); + DCHECK_EQ(array->GetType(), Primitive::kPrimNot); + + // Don't apply this optimization when the array is nullptr. + if (array->IsConstant() || (array->IsNullCheck() && array->InputAt(0)->IsConstant())) { + return; + } + + // Is there a null check that could be an implicit check? + if (array->IsNullCheck() && do_implicit_null_checks_) { + // The ArrayLen may generate the implicit null check. Can the + // bounds check do so as well? + if (array_len->GetNextDisregardingMoves() != check) { + // No, it won't. Leave as is. + return; + } + } + + // Can we suppress the ArrayLength and generate at BoundCheck? + if (array_len->HasOnlyOneNonEnvironmentUse()) { + array_len->MarkEmittedAtUseSite(); + // We need the ArrayLength just before the BoundsCheck. + array_len->MoveBefore(check); + } + } + + bool do_implicit_null_checks_; +}; + +X86MemoryOperandGeneration::X86MemoryOperandGeneration(HGraph* graph, + OptimizingCompilerStats* stats, + CodeGenerator* codegen) + : HOptimization(graph, kX86MemoryOperandGenerationPassName, stats), + do_implicit_null_checks_(codegen->GetCompilerOptions().GetImplicitNullChecks()) { +} + +void X86MemoryOperandGeneration::Run() { + MemoryOperandVisitor visitor(graph_, do_implicit_null_checks_); + visitor.VisitInsertionOrder(); +} + +} // namespace x86 +} // namespace art diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h new file mode 100644 index 0000000000..7e886819bb --- /dev/null +++ b/compiler/optimizing/x86_memory_gen.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_ +#define ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { +class CodeGenerator; + +namespace x86 { + +class X86MemoryOperandGeneration : public HOptimization { + public: + X86MemoryOperandGeneration(HGraph* graph, + OptimizingCompilerStats* stats, + CodeGenerator* codegen); + + void Run() OVERRIDE; + + static constexpr const char* kX86MemoryOperandGenerationPassName = + "x86_memory_operand_generation"; + + private: + bool do_implicit_null_checks_; +}; + +} // namespace x86 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_ |