Enable the register allocator on ARM.
- Also fixes a few bugs/wrong assumptions in code not hit by x86.
- We need to differentiate between moves due to connecting siblings within
a block, and moves due to control flow resolution.
Change-Id: Idd05cf138a71c8f36f5531c473de613c0166fe38
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 82fa639..11b3a33 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -67,8 +67,7 @@
// Note that this follows the current calling convention.
return GetFrameSize()
+ kVRegSize // Art method
- + (parameter->GetIndex() - graph_->GetNumberOfVRegs() + graph_->GetNumberOfInVRegs())
- * kVRegSize;
+ + parameter->GetIndex() * kVRegSize;
}
virtual void GenerateFrameEntry() = 0;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d61df36..2aebf9a 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -48,7 +48,8 @@
CodeGeneratorARM::CodeGeneratorARM(HGraph* graph)
: CodeGenerator(graph, kNumberOfRegIds),
location_builder_(graph, this),
- instruction_visitor_(graph, this) {}
+ instruction_visitor_(graph, this),
+ move_resolver_(graph->GetArena(), this) {}
static bool* GetBlockedRegisterPairs(bool* blocked_registers) {
return blocked_registers + kNumberOfAllocIds;
@@ -106,6 +107,9 @@
// Reserve thread register.
blocked_registers[TR] = true;
+ // Reserve temp register.
+ blocked_registers[IP] = true;
+
// TODO: We currently don't use Quick's callee saved registers.
blocked_registers[R5] = true;
blocked_registers[R6] = true;
@@ -254,8 +258,8 @@
if (source.IsRegister()) {
__ str(source.AsArm().AsCoreRegister(), Address(SP, destination.GetStackIndex()));
} else {
- __ ldr(R0, Address(SP, source.GetStackIndex()));
- __ str(R0, Address(SP, destination.GetStackIndex()));
+ __ ldr(IP, Address(SP, source.GetStackIndex()));
+ __ str(IP, Address(SP, destination.GetStackIndex()));
}
}
}
@@ -295,8 +299,8 @@
} else {
DCHECK(source.IsDoubleStackSlot());
__ ldr(calling_convention.GetRegisterAt(argument_index), Address(SP, source.GetStackIndex()));
- __ ldr(R0, Address(SP, source.GetHighStackIndex(kArmWordSize)));
- __ str(R0, Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize)));
+ __ ldr(IP, Address(SP, source.GetHighStackIndex(kArmWordSize)));
+ __ str(IP, Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize)));
}
} else {
DCHECK(destination.IsDoubleStackSlot());
@@ -313,15 +317,15 @@
uint32_t argument_index = source.GetQuickParameterIndex();
__ str(calling_convention.GetRegisterAt(argument_index),
Address(SP, destination.GetStackIndex()));
- __ ldr(R0,
+ __ ldr(IP,
Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize) + GetFrameSize()));
- __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
+ __ str(IP, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
} else {
DCHECK(source.IsDoubleStackSlot());
- __ ldr(R0, Address(SP, source.GetStackIndex()));
- __ str(R0, Address(SP, destination.GetStackIndex()));
- __ ldr(R0, Address(SP, source.GetHighStackIndex(kArmWordSize)));
- __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
+ __ ldr(IP, Address(SP, source.GetStackIndex()));
+ __ str(IP, Address(SP, destination.GetStackIndex()));
+ __ ldr(IP, Address(SP, source.GetHighStackIndex(kArmWordSize)));
+ __ str(IP, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
}
}
}
@@ -332,8 +336,8 @@
if (location.IsRegister()) {
__ LoadImmediate(location.AsArm().AsCoreRegister(), value);
} else {
- __ LoadImmediate(R0, value);
- __ str(R0, Address(SP, location.GetStackIndex()));
+ __ LoadImmediate(IP, value);
+ __ str(IP, Address(SP, location.GetStackIndex()));
}
} else if (instruction->AsLongConstant() != nullptr) {
int64_t value = instruction->AsLongConstant()->GetValue();
@@ -341,10 +345,10 @@
__ LoadImmediate(location.AsArm().AsRegisterPairLow(), Low32Bits(value));
__ LoadImmediate(location.AsArm().AsRegisterPairHigh(), High32Bits(value));
} else {
- __ LoadImmediate(R0, Low32Bits(value));
- __ str(R0, Address(SP, location.GetStackIndex()));
- __ LoadImmediate(R0, High32Bits(value));
- __ str(R0, Address(SP, location.GetHighStackIndex(kArmWordSize)));
+ __ LoadImmediate(IP, Low32Bits(value));
+ __ str(IP, Address(SP, location.GetStackIndex()));
+ __ LoadImmediate(IP, High32Bits(value));
+ __ str(IP, Address(SP, location.GetHighStackIndex(kArmWordSize)));
}
} else if (instruction->AsLoadLocal() != nullptr) {
uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
@@ -493,7 +497,7 @@
}
void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) {
- // Will be generated at use site.
+ codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
}
void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) {
@@ -564,7 +568,7 @@
void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
- locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(ArmCoreLocation(R0));
InvokeDexCallingConventionVisitor calling_convention_visitor;
for (size_t i = 0; i < invoke->InputCount(); i++) {
@@ -811,15 +815,93 @@
}
void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) {
- LOG(FATAL) << "Unimplemented";
+ LOG(FATAL) << "Unreachable";
}
void LocationsBuilderARM::VisitParallelMove(HParallelMove* instruction) {
- LOG(FATAL) << "Unimplemented";
+ LOG(FATAL) << "Unreachable";
}
void InstructionCodeGeneratorARM::VisitParallelMove(HParallelMove* instruction) {
- LOG(FATAL) << "Unimplemented";
+ codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+ArmAssembler* ParallelMoveResolverARM::GetAssembler() const {
+ return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverARM::EmitMove(size_t index) {
+ MoveOperands* move = moves_.Get(index);
+ Location source = move->GetSource();
+ Location destination = move->GetDestination();
+
+ if (source.IsRegister()) {
+ if (destination.IsRegister()) {
+ __ Mov(destination.AsArm().AsCoreRegister(), source.AsArm().AsCoreRegister());
+ } else {
+ DCHECK(destination.IsStackSlot());
+ __ StoreToOffset(kStoreWord, source.AsArm().AsCoreRegister(),
+ SP, destination.GetStackIndex());
+ }
+ } else if (source.IsStackSlot()) {
+ if (destination.IsRegister()) {
+ __ LoadFromOffset(kLoadWord, destination.AsArm().AsCoreRegister(),
+ SP, source.GetStackIndex());
+ } else {
+ DCHECK(destination.IsStackSlot());
+ __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+ }
+ } else {
+ LOG(FATAL) << "Unimplemented";
+ }
+}
+
+void ParallelMoveResolverARM::Exchange(Register reg, int mem) {
+ __ Mov(IP, reg);
+ __ LoadFromOffset(kLoadWord, reg, SP, mem);
+ __ StoreToOffset(kStoreWord, IP, SP, mem);
+}
+
+void ParallelMoveResolverARM::Exchange(int mem1, int mem2) {
+ ScratchRegisterScope ensure_scratch(this, IP, R0, codegen_->GetNumberOfCoreRegisters());
+ int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
+ __ LoadFromOffset(kLoadWord, static_cast<Register>(ensure_scratch.GetRegister()),
+ SP, mem1 + stack_offset);
+ __ LoadFromOffset(kLoadWord, IP, SP, mem2 + stack_offset);
+ __ StoreToOffset(kStoreWord, static_cast<Register>(ensure_scratch.GetRegister()),
+ SP, mem2 + stack_offset);
+ __ StoreToOffset(kStoreWord, IP, SP, mem1 + stack_offset);
+}
+
+void ParallelMoveResolverARM::EmitSwap(size_t index) {
+ MoveOperands* move = moves_.Get(index);
+ Location source = move->GetSource();
+ Location destination = move->GetDestination();
+
+ if (source.IsRegister() && destination.IsRegister()) {
+ DCHECK_NE(source.AsArm().AsCoreRegister(), IP);
+ DCHECK_NE(destination.AsArm().AsCoreRegister(), IP);
+ __ Mov(IP, source.AsArm().AsCoreRegister());
+ __ Mov(source.AsArm().AsCoreRegister(), destination.AsArm().AsCoreRegister());
+ __ Mov(destination.AsArm().AsCoreRegister(), IP);
+ } else if (source.IsRegister() && destination.IsStackSlot()) {
+ Exchange(source.AsArm().AsCoreRegister(), destination.GetStackIndex());
+ } else if (source.IsStackSlot() && destination.IsRegister()) {
+ Exchange(destination.AsArm().AsCoreRegister(), source.GetStackIndex());
+ } else if (source.IsStackSlot() && destination.IsStackSlot()) {
+ Exchange(source.GetStackIndex(), destination.GetStackIndex());
+ } else {
+ LOG(FATAL) << "Unimplemented";
+ }
+}
+
+void ParallelMoveResolverARM::SpillScratch(int reg) {
+ __ Push(static_cast<Register>(reg));
+}
+
+void ParallelMoveResolverARM::RestoreScratch(int reg) {
+ __ Pop(static_cast<Register>(reg));
}
} // namespace arm
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index ac5ef21..712a24c 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -19,6 +19,7 @@
#include "code_generator.h"
#include "nodes.h"
+#include "parallel_move_resolver.h"
#include "utils/arm/assembler_arm32.h"
namespace art {
@@ -59,6 +60,27 @@
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
};
+class ParallelMoveResolverARM : public ParallelMoveResolver {
+ public:
+ ParallelMoveResolverARM(ArenaAllocator* allocator, CodeGeneratorARM* codegen)
+ : ParallelMoveResolver(allocator), codegen_(codegen) {}
+
+ virtual void EmitMove(size_t index) OVERRIDE;
+ virtual void EmitSwap(size_t index) OVERRIDE;
+ virtual void SpillScratch(int reg) OVERRIDE;
+ virtual void RestoreScratch(int reg) OVERRIDE;
+
+ ArmAssembler* GetAssembler() const;
+
+ private:
+ void Exchange(Register reg, int mem);
+ void Exchange(int mem1, int mem2);
+
+ CodeGeneratorARM* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM);
+};
+
class LocationsBuilderARM : public HGraphVisitor {
public:
explicit LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen)
@@ -145,6 +167,10 @@
virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+ ParallelMoveResolverARM* GetMoveResolver() {
+ return &move_resolver_;
+ }
+
private:
// Helper method to move a 32bits value between two locations.
void Move32(Location destination, Location source);
@@ -153,6 +179,7 @@
LocationsBuilderARM location_builder_;
InstructionCodeGeneratorARM instruction_visitor_;
+ ParallelMoveResolverARM move_resolver_;
Arm32Assembler assembler_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index c7dca86..f24af5b 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -847,7 +847,7 @@
void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src) {
ScratchRegisterScope ensure_scratch(
- this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
__ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, src + stack_offset));
__ movl(Address(ESP, dst + stack_offset), static_cast<Register>(ensure_scratch.GetRegister()));
@@ -879,7 +879,10 @@
}
void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
- ScratchRegisterScope ensure_scratch(this, reg, codegen_->GetNumberOfCoreRegisters());
+ Register suggested_scratch = reg == EAX ? EBX : EAX;
+ ScratchRegisterScope ensure_scratch(
+ this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+
int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
__ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
__ movl(Address(ESP, mem + stack_offset), reg);
@@ -889,9 +892,12 @@
void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
ScratchRegisterScope ensure_scratch1(
- this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+
+ Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
ScratchRegisterScope ensure_scratch2(
- this, ensure_scratch1.GetRegister(), codegen_->GetNumberOfCoreRegisters());
+ this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+
int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
__ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 4a1b6ce..cadd3c5 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -163,7 +163,11 @@
return false;
}
-int ParallelMoveResolver::AllocateScratchRegister(int blocked, int register_count, bool* spilled) {
+int ParallelMoveResolver::AllocateScratchRegister(int blocked,
+ int register_count,
+ int if_scratch,
+ bool* spilled) {
+ DCHECK_NE(blocked, if_scratch);
int scratch = -1;
for (int reg = 0; reg < register_count; ++reg) {
if ((blocked != reg) &&
@@ -175,11 +179,7 @@
if (scratch == -1) {
*spilled = true;
- for (int reg = 0; reg < register_count; ++reg) {
- if (blocked != reg) {
- scratch = reg;
- }
- }
+ scratch = if_scratch;
} else {
*spilled = false;
}
@@ -189,11 +189,11 @@
ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
- ParallelMoveResolver* resolver, int blocked, int number_of_registers)
+ ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
: resolver_(resolver),
reg_(kNoRegister),
spilled_(false) {
- reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, &spilled_);
+ reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_);
if (spilled_) {
resolver->SpillScratch(reg_);
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index e1189d8..fcc1de6 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -42,7 +42,10 @@
protected:
class ScratchRegisterScope : public ValueObject {
public:
- ScratchRegisterScope(ParallelMoveResolver* resolver, int blocked, int number_of_registers);
+ ScratchRegisterScope(ParallelMoveResolver* resolver,
+ int blocked,
+ int if_scratch,
+ int number_of_registers);
~ScratchRegisterScope();
int GetRegister() const { return reg_; }
@@ -55,7 +58,7 @@
};
bool IsScratchLocation(Location loc);
- int AllocateScratchRegister(int blocked, int register_count, bool* spilled);
+ int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled);
// Emit a move.
virtual void EmitMove(size_t index) = 0;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index c2a4769..348e9d4 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -651,7 +651,9 @@
// Move must happen after the instruction.
DCHECK(!at->IsControlFlow());
move = at->GetNext()->AsParallelMove();
- if (move == nullptr || IsInputMove(move)) {
+ // This is a parallel move for connecting siblings in a same block. We need to
+ // differentiate it with moves for connecting blocks, and input moves.
+ if (move == nullptr || move->GetLifetimePosition() != position) {
move = new (allocator_) HParallelMove(allocator_);
move->SetLifetimePosition(position);
at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
@@ -660,7 +662,9 @@
// Move must happen before the instruction.
HInstruction* previous = at->GetPrevious();
if (previous != nullptr && previous->AsParallelMove() != nullptr) {
- if (IsInputMove(previous)) {
+ // This is a parallel move for connecting siblings in a same block. We need to
+ // differentiate it with moves for connecting blocks, and input moves.
+ if (previous->GetLifetimePosition() != position) {
previous = previous->GetPrevious();
}
}
@@ -684,8 +688,12 @@
HInstruction* last = block->GetLastInstruction();
HInstruction* previous = last->GetPrevious();
HParallelMove* move;
- if (previous == nullptr || previous->AsParallelMove() == nullptr) {
+ // This is a parallel move for connecting blocks. We need to differentiate
+ // it with moves for connecting siblings in a same block, and output moves.
+ if (previous == nullptr || previous->AsParallelMove() == nullptr
+ || previous->AsParallelMove()->GetLifetimePosition() != block->GetLifetimeEnd()) {
move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(block->GetLifetimeEnd());
block->InsertInstructionBefore(move, last);
} else {
move = previous->AsParallelMove();
@@ -700,7 +708,9 @@
HInstruction* first = block->GetFirstInstruction();
HParallelMove* move = first->AsParallelMove();
- if (move == nullptr || IsInputMove(move)) {
+ // This is a parallel move for connecting blocks. We need to differentiate
+ // it with moves for connecting siblings in a same block, and input moves.
+ if (move == nullptr || move->GetLifetimePosition() != block->GetLifetimeStart()) {
move = new (allocator_) HParallelMove(allocator_);
move->SetLifetimePosition(block->GetLifetimeStart());
block->InsertInstructionBefore(move, first);
@@ -718,9 +728,14 @@
return;
}
+ size_t position = instruction->GetLifetimePosition() + 1;
HParallelMove* move = instruction->GetNext()->AsParallelMove();
- if (move == nullptr || IsInputMove(move)) {
+ // This is a parallel move for moving the output of an instruction. We need
+ // to differentiate with input moves, moves for connecting siblings in a
+ // and moves for connecting blocks.
+ if (move == nullptr || move->GetLifetimePosition() != position) {
move = new (allocator_) HParallelMove(allocator_);
+ move->SetLifetimePosition(position);
instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
}
move->AddMove(new (allocator_) MoveOperands(source, destination));
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 1b5585f..8b7c4f1 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -65,7 +65,7 @@
static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
static bool Supports(InstructionSet instruction_set) {
- return instruction_set == kX86;
+ return instruction_set == kX86 || instruction_set == kArm;
}
size_t GetNumberOfSpillSlots() const {