diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator.cc | 66 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 21 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 3 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 3 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 71 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 12 |
7 files changed, 77 insertions, 100 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 6f424ce11d..b3e99f9c0f 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -177,6 +177,31 @@ int32_t CodeGenerator::GetStackSlot(HLocal* local) const { } } +void CodeGenerator::MaybeBlockPhysicalRegisters(Location loc, bool is_output) const { + // The DCHECKS below check that a register is not specified twice in + // the summary. + // Note that fixed output registers are allowed to overlap with fixed input and + // temp registers: the writer of the location summary has to make sure they + // don't conflict with each other. + if (loc.IsRegister()) { + DCHECK(is_output || !blocked_core_registers_[loc.reg()]); + blocked_core_registers_[loc.reg()] = true; + } else if (loc.IsFpuRegister()) { + DCHECK(is_output || !blocked_fpu_registers_[loc.reg()]); + blocked_fpu_registers_[loc.reg()] = true; + } else if (loc.IsFpuRegisterPair()) { + DCHECK(is_output || !blocked_fpu_registers_[loc.AsFpuRegisterPairLow<int>()]); + blocked_fpu_registers_[loc.AsFpuRegisterPairLow<int>()] = true; + DCHECK(is_output || !blocked_fpu_registers_[loc.AsFpuRegisterPairHigh<int>()]); + blocked_fpu_registers_[loc.AsFpuRegisterPairHigh<int>()] = true; + } else if (loc.IsRegisterPair()) { + DCHECK(is_output || !blocked_core_registers_[loc.AsRegisterPairLow<int>()]); + blocked_core_registers_[loc.AsRegisterPairLow<int>()] = true; + DCHECK(is_output || !blocked_core_registers_[loc.AsRegisterPairHigh<int>()]); + blocked_core_registers_[loc.AsRegisterPairHigh<int>()] = true; + } +} + void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { LocationSummary* locations = instruction->GetLocations(); if (locations == nullptr) return; @@ -196,43 +221,18 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { // Mark all fixed input, temp and output registers as used. for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { Location loc = locations->InAt(i); - // The DCHECKS below check that a register is not specified twice in - // the summary. - if (loc.IsRegister()) { - DCHECK(!blocked_core_registers_[loc.reg()]); - blocked_core_registers_[loc.reg()] = true; - } else if (loc.IsFpuRegister()) { - DCHECK(!blocked_fpu_registers_[loc.reg()]); - blocked_fpu_registers_[loc.reg()] = true; - } else if (loc.IsFpuRegisterPair()) { - DCHECK(!blocked_fpu_registers_[loc.AsFpuRegisterPairLow<int>()]); - blocked_fpu_registers_[loc.AsFpuRegisterPairLow<int>()] = true; - DCHECK(!blocked_fpu_registers_[loc.AsFpuRegisterPairHigh<int>()]); - blocked_fpu_registers_[loc.AsFpuRegisterPairHigh<int>()] = true; - } else if (loc.IsRegisterPair()) { - DCHECK(!blocked_core_registers_[loc.AsRegisterPairLow<int>()]); - blocked_core_registers_[loc.AsRegisterPairLow<int>()] = true; - DCHECK(!blocked_core_registers_[loc.AsRegisterPairHigh<int>()]); - blocked_core_registers_[loc.AsRegisterPairHigh<int>()] = true; - } + MaybeBlockPhysicalRegisters(loc, false); } for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) { Location loc = locations->GetTemp(i); - // The DCHECKS below check that a register is not specified twice in - // the summary. - if (loc.IsRegister()) { - DCHECK(!blocked_core_registers_[loc.reg()]); - blocked_core_registers_[loc.reg()] = true; - } else if (loc.IsFpuRegister()) { - DCHECK(!blocked_fpu_registers_[loc.reg()]); - blocked_fpu_registers_[loc.reg()] = true; - } else { - DCHECK(loc.GetPolicy() == Location::kRequiresRegister - || loc.GetPolicy() == Location::kRequiresFpuRegister); - } + MaybeBlockPhysicalRegisters(loc, false); } + // If the output is a fixed register, mark it as used. + Location result_location = locations->Out(); + MaybeBlockPhysicalRegisters(result_location, true); + SetupBlockedRegisters(); // Allocate all unallocated input locations. @@ -276,9 +276,11 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { << loc.GetPolicy(); } locations->SetTempAt(i, loc); + } else { + DCHECK(loc.IsFpuRegister() || loc.IsRegister()); } } - Location result_location = locations->Out(); + if (result_location.IsUnallocated()) { switch (result_location.GetPolicy()) { case Location::kAny: diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 1d42c47d56..e03bc53391 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -211,6 +211,7 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { // Register allocation logic. void AllocateRegistersLocally(HInstruction* instruction) const; + void MaybeBlockPhysicalRegisters(Location loc, bool is_output) const; // Backend specific implementation for allocating a register. virtual Location AllocateFreeRegister(Primitive::Type type) const = 0; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 002d9d4449..43dc4e5f1c 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -38,7 +38,7 @@ static DRegister FromLowSToD(SRegister reg) { static constexpr bool kExplicitStackOverflowCheck = false; -static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2; // LR, R6, R7 +static constexpr int kNumberOfPushedRegistersAtEntry = 1; // LR. static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 }; @@ -444,21 +444,16 @@ void CodeGeneratorARM::SetupBlockedRegisters() const { blocked_core_registers_[LR] = true; blocked_core_registers_[PC] = true; + // Reserve frame pointer register. + blocked_core_registers_[FP] = true; + // Reserve thread register. blocked_core_registers_[TR] = true; // Reserve temp register. blocked_core_registers_[IP] = true; - // TODO: We currently don't use Quick's callee saved registers. - // We always save and restore R6 and R7 to make sure we can use three - // register pairs for long operations. - blocked_core_registers_[R4] = true; - blocked_core_registers_[R5] = true; - blocked_core_registers_[R8] = true; - blocked_core_registers_[R10] = true; - blocked_core_registers_[R11] = true; - + // Reserve hard-float's callee saved registers. blocked_fpu_registers_[S16] = true; blocked_fpu_registers_[S17] = true; blocked_fpu_registers_[S18] = true; @@ -513,8 +508,8 @@ void CodeGeneratorARM::GenerateFrameEntry() { } } - core_spill_mask_ |= (1 << LR | 1 << R6 | 1 << R7); - __ PushList(1 << LR | 1 << R6 | 1 << R7); + core_spill_mask_ |= (1 << LR); + __ Push(LR); // The return PC has already been pushed on the stack. __ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize)); @@ -523,7 +518,7 @@ void CodeGeneratorARM::GenerateFrameEntry() { void CodeGeneratorARM::GenerateFrameExit() { __ AddConstant(SP, GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize); - __ PopList(1 << PC | 1 << R6 | 1 << R7); + __ Pop(PC); } void CodeGeneratorARM::Bind(HBasicBlock* block) { diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index c7517d3abc..bfb7414995 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -716,11 +716,8 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const { // tr // lr // sp is not part of the allocatable registers, so we don't need to block it. - // TODO: Avoid blocking callee-saved registers, and instead preserve them - // where necessary. CPURegList reserved_core_registers = vixl_reserved_core_registers; reserved_core_registers.Combine(runtime_reserved_core_registers); - reserved_core_registers.Combine(quick_callee_saved_registers); while (!reserved_core_registers.IsEmpty()) { blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true; } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 1d5bfb734e..38e2965b2c 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -48,9 +48,6 @@ const vixl::Register tr = vixl::x18; // Thread Register const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1); const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31); const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr); -const vixl::CPURegList quick_callee_saved_registers(vixl::CPURegister::kRegister, - vixl::kXRegSize, - kArm64CalleeSaveRefSpills); Location ARM64ReturnLocation(Primitive::Type return_type); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index e7edd8a805..2cba637d67 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -42,6 +42,12 @@ static constexpr size_t kRuntimeParameterCoreRegistersLength = static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { }; static constexpr size_t kRuntimeParameterFpuRegistersLength = 0; +static constexpr Register kByteRegisters[] = { EAX, ECX, EDX, EBX }; + +static ByteRegister ToByteRegister(Register reg) { + return X86ManagedRegister::FromCpuRegister(reg).AsByteRegister(); +} + // Marker for places that can be updated once we don't follow the quick ABI. static constexpr bool kFollowsQuickABI = true; @@ -437,11 +443,8 @@ void CodeGeneratorX86::SetupBlockedRegisters() const { // Stack register is always reserved. blocked_core_registers_[ESP] = true; - // TODO: We currently don't use Quick's callee saved registers. - DCHECK(kFollowsQuickABI); + // Frame register is always reserved. blocked_core_registers_[EBP] = true; - blocked_core_registers_[ESI] = true; - blocked_core_registers_[EDI] = true; UpdateBlockedPairRegisters(); } @@ -929,7 +932,7 @@ void LocationsBuilderX86::VisitCondition(HCondition* comp) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); if (comp->NeedsMaterialization()) { - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RegisterLocation(kByteRegisters[0])); } } @@ -950,7 +953,7 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) { __ cmpl(locations->InAt(0).AsRegister<Register>(), Address(ESP, locations->InAt(1).GetStackIndex())); } - __ setb(X86Condition(comp->GetCondition()), reg); + __ setb(X86Condition(comp->GetCondition()), ToByteRegister(reg)); } } @@ -1165,11 +1168,11 @@ void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) { case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimNot: - locations->SetOut(Location::RegisterLocation(EAX)); + locations->SetOut(Location::RegisterLocation(EAX), Location::kNoOutputOverlap); break; case Primitive::kPrimLong: - locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); + locations->SetOut(Location::RegisterPairLocation(EAX, EDX), Location::kNoOutputOverlap); break; case Primitive::kPrimVoid: @@ -1177,7 +1180,7 @@ void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) { case Primitive::kPrimDouble: case Primitive::kPrimFloat: - locations->SetOut(Location::FpuRegisterLocation(XMM0)); + locations->SetOut(Location::FpuRegisterLocation(XMM0), Location::kNoOutputOverlap); break; } @@ -1347,7 +1350,7 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimInt: case Primitive::kPrimChar: // Processing a Dex `int-to-byte' instruction. - locations->SetInAt(0, Location::Any()); + locations->SetInAt(0, Location::RegisterLocation(kByteRegisters[0])); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -1542,15 +1545,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimInt: case Primitive::kPrimChar: // Processing a Dex `int-to-byte' instruction. - if (in.IsRegister()) { - __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>()); - } else if (in.IsStackSlot()) { - __ movsxb(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); - } else { - DCHECK(in.GetConstant()->IsIntConstant()); - int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); - __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value))); - } + __ movsxb(out.AsRegister<Register>(), ToByteRegister(in.AsRegister<Register>())); break; default: @@ -2668,17 +2663,16 @@ void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) || (field_type == Primitive::kPrimByte); // The register allocator does not support multiple // inputs that die at entry with one in a specific register. + size_t byte_register_index = 0; if (is_byte_type) { - // Ensure the value is in a byte register. - locations->SetInAt(1, Location::RegisterLocation(EAX)); + locations->SetInAt(1, Location::RegisterLocation(kByteRegisters[byte_register_index++])); } else { locations->SetInAt(1, Location::RequiresRegister()); } // Temporary registers for the write barrier. if (needs_write_barrier) { locations->AddTemp(Location::RequiresRegister()); - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + locations->AddTemp(Location::RegisterLocation(kByteRegisters[byte_register_index])); } } @@ -2691,7 +2685,7 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instr switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>(); + ByteRegister value = ToByteRegister(locations->InAt(1).AsRegister<Register>()); __ movb(Address(obj, offset), value); break; } @@ -2741,15 +2735,17 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instr } } -void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) { +void CodeGeneratorX86::MarkGCCard(Register temp, + Register card, + Register object, + Register value) { Label is_null; __ testl(value, value); __ j(kEqual, &is_null); __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value())); __ movl(temp, object); __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); - __ movb(Address(temp, card, TIMES_1, 0), - X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); + __ movb(Address(temp, card, TIMES_1, 0), ToByteRegister(card)); __ Bind(&is_null); } @@ -2980,17 +2976,17 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // inputs that die at entry with one in a specific register. locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + size_t byte_register_index = 0; if (is_byte_type) { - // Ensure the value is in a byte register. - locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2))); + locations->SetInAt(2, Location::ByteRegisterOrConstant( + kByteRegisters[byte_register_index++], instruction->InputAt(2))); } else { locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); } // Temporary registers for the write barrier. if (needs_write_barrier) { locations->AddTemp(Location::RequiresRegister()); - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + locations->AddTemp(Location::RegisterLocation(kByteRegisters[byte_register_index])); } } } @@ -3012,7 +3008,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; if (value.IsRegister()) { - __ movb(Address(obj, offset), value.AsRegister<ByteRegister>()); + __ movb(Address(obj, offset), ToByteRegister(value.AsRegister<Register>())); } else { __ movb(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); @@ -3020,7 +3016,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { } else { if (value.IsRegister()) { __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset), - value.AsRegister<ByteRegister>()); + ToByteRegister(value.AsRegister<Register>())); } else { __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); @@ -3463,17 +3459,16 @@ void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { || (field_type == Primitive::kPrimByte); // The register allocator does not support multiple // inputs that die at entry with one in a specific register. + size_t byte_register_index = 0; if (is_byte_type) { - // Ensure the value is in a byte register. - locations->SetInAt(1, Location::RegisterLocation(EAX)); + locations->SetInAt(1, Location::RegisterLocation(kByteRegisters[byte_register_index++])); } else { locations->SetInAt(1, Location::RequiresRegister()); } // Temporary registers for the write barrier. if (needs_write_barrier) { locations->AddTemp(Location::RequiresRegister()); - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + locations->AddTemp(Location::RegisterLocation(kByteRegisters[byte_register_index])); } } @@ -3486,7 +3481,7 @@ void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instructi switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>(); + ByteRegister value = ToByteRegister(locations->InAt(1).AsRegister<Register>()); __ movb(Address(cls, offset), value); break; } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ff7fcdcbac..36f14b5a72 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -453,18 +453,8 @@ void CodeGeneratorX86_64::SetupBlockedRegisters() const { // Block the register used as TMP. blocked_core_registers_[TMP] = true; - // TODO: We currently don't use Quick's callee saved registers. - blocked_core_registers_[RBX] = true; + // Block the frame pointer. blocked_core_registers_[RBP] = true; - blocked_core_registers_[R12] = true; - blocked_core_registers_[R13] = true; - blocked_core_registers_[R14] = true; - blocked_core_registers_[R15] = true; - - blocked_fpu_registers_[XMM12] = true; - blocked_fpu_registers_[XMM13] = true; - blocked_fpu_registers_[XMM14] = true; - blocked_fpu_registers_[XMM15] = true; } void CodeGeneratorX86_64::GenerateFrameEntry() { |