Opt compiler: Add arm64 support for register allocation.
Change-Id: Idc6e84eee66170de4a9c0a5844c3da038c083aa7
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index a61ef2d..b048c07 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -90,10 +90,12 @@
}
Register XRegisterFrom(Location location) {
+ DCHECK(location.IsRegister());
return Register::XRegFromCode(VIXLRegCodeFromART(location.reg()));
}
Register WRegisterFrom(Location location) {
+ DCHECK(location.IsRegister());
return Register::WRegFromCode(VIXLRegCodeFromART(location.reg()));
}
@@ -112,10 +114,12 @@
}
FPRegister DRegisterFrom(Location location) {
+ DCHECK(location.IsFpuRegister());
return FPRegister::DRegFromCode(location.reg());
}
FPRegister SRegisterFrom(Location location) {
+ DCHECK(location.IsFpuRegister());
return FPRegister::SRegFromCode(location.reg());
}
@@ -133,6 +137,11 @@
instr->InputAt(input_index)->GetType());
}
+CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
+ return IsFPType(type) ? CPURegister(FPRegisterFrom(location, type))
+ : CPURegister(RegisterFrom(location, type));
+}
+
CPURegister OutputCPURegister(HInstruction* instr) {
return IsFPType(instr->GetType()) ? static_cast<CPURegister>(OutputFPRegister(instr))
: static_cast<CPURegister>(OutputRegister(instr));
@@ -266,14 +275,32 @@
class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
public:
- BoundsCheckSlowPathARM64() {}
+ BoundsCheckSlowPathARM64(HBoundsCheck* instruction,
+ Location index_location,
+ Location length_location)
+ : instruction_(instruction),
+ index_location_(index_location),
+ length_location_(length_location) {}
+
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
- __ Brk(__LINE__); // TODO: Unimplemented BoundsCheckSlowPathARM64.
+ // We're moving two locations to locations that could overlap, so we need a parallel
+ // move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ codegen->EmitParallelMoves(
+ index_location_, LocationFrom(calling_convention.GetRegisterAt(0)),
+ length_location_, LocationFrom(calling_convention.GetRegisterAt(1)));
+ arm64_codegen->InvokeRuntime(
+ QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc());
}
private:
+ HBoundsCheck* const instruction_;
+ const Location index_location_;
+ const Location length_location_;
+
DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
};
@@ -322,7 +349,7 @@
if (out.IsValid()) {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
Primitive::Type type = at_->GetType();
- arm64_codegen->MoveHelper(out, calling_convention.GetReturnLocation(type), type);
+ arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
}
codegen->RestoreLiveRegisters(locations);
@@ -364,7 +391,7 @@
arm64_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc());
Primitive::Type type = instruction_->GetType();
- arm64_codegen->MoveHelper(locations->Out(), calling_convention.GetReturnLocation(type), type);
+ arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
codegen->RestoreLiveRegisters(locations);
__ B(GetExitLabel());
@@ -445,15 +472,51 @@
class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
public:
- TypeCheckSlowPathARM64() {}
+ TypeCheckSlowPathARM64(HInstruction* instruction,
+ Location class_to_check,
+ Location object_class,
+ uint32_t dex_pc)
+ : instruction_(instruction),
+ class_to_check_(class_to_check),
+ object_class_(object_class),
+ dex_pc_(dex_pc) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(instruction_->IsCheckCast()
+ || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+
__ Bind(GetEntryLabel());
- __ Brk(__LINE__); // TODO: Unimplemented TypeCheckSlowPathARM64.
+ codegen->SaveLiveRegisters(locations);
+
+ // We're moving two locations to locations that could overlap, so we need a parallel
+ // move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ codegen->EmitParallelMoves(
+ class_to_check_, LocationFrom(calling_convention.GetRegisterAt(0)),
+ object_class_, LocationFrom(calling_convention.GetRegisterAt(1)));
+
+ if (instruction_->IsInstanceOf()) {
+ arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_);
+ Primitive::Type ret_type = instruction_->GetType();
+ Location ret_loc = calling_convention.GetReturnLocation(ret_type);
+ arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
+ } else {
+ DCHECK(instruction_->IsCheckCast());
+ arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_);
+ }
+
+ codegen->RestoreLiveRegisters(locations);
__ B(GetExitLabel());
}
private:
+ HInstruction* const instruction_;
+ const Location class_to_check_;
+ const Location object_class_;
+ uint32_t dex_pc_;
+
DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
};
@@ -487,7 +550,8 @@
kNumberOfAllocatableRegisterPairs),
block_labels_(nullptr),
location_builder_(graph, this),
- instruction_visitor_(graph, this) {}
+ instruction_visitor_(graph, this),
+ move_resolver_(graph->GetArena(), this) {}
#undef __
#define __ GetVIXLAssembler()->
@@ -498,6 +562,24 @@
CodeGenerator::Finalize(allocator);
}
+void ParallelMoveResolverARM64::EmitMove(size_t index) {
+ MoveOperands* move = moves_.Get(index);
+ codegen_->MoveLocation(move->GetDestination(), move->GetSource());
+}
+
+void ParallelMoveResolverARM64::EmitSwap(size_t index) {
+ MoveOperands* move = moves_.Get(index);
+ codegen_->SwapLocations(move->GetDestination(), move->GetSource());
+}
+
+void ParallelMoveResolverARM64::RestoreScratch(int reg) {
+ __ Pop(Register(VIXLRegCodeFromART(reg), kXRegSize));
+}
+
+void ParallelMoveResolverARM64::SpillScratch(int reg) {
+ __ Push(Register(VIXLRegCodeFromART(reg), kXRegSize));
+}
+
void CodeGeneratorARM64::GenerateFrameEntry() {
bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod();
if (do_overflow_check) {
@@ -571,18 +653,18 @@
}
} else if (instruction->IsTemporary()) {
Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
- MoveHelper(location, temp_location, type);
+ MoveLocation(location, temp_location, type);
} else if (instruction->IsLoadLocal()) {
uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
if (Is64BitType(type)) {
- MoveHelper(location, Location::DoubleStackSlot(stack_slot), type);
+ MoveLocation(location, Location::DoubleStackSlot(stack_slot), type);
} else {
- MoveHelper(location, Location::StackSlot(stack_slot), type);
+ MoveLocation(location, Location::StackSlot(stack_slot), type);
}
} else {
DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
- MoveHelper(location, locations->Out(), type);
+ MoveLocation(location, locations->Out(), type);
}
}
@@ -665,6 +747,30 @@
}
}
+size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+ Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
+ __ Str(reg, MemOperand(sp, stack_index));
+ return kArm64WordSize;
+}
+
+size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+ Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
+ __ Ldr(reg, MemOperand(sp, stack_index));
+ return kArm64WordSize;
+}
+
+size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ FPRegister reg = FPRegister(reg_id, kDRegSize);
+ __ Str(reg, MemOperand(sp, stack_index));
+ return kArm64WordSize;
+}
+
+size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ FPRegister reg = FPRegister(reg_id, kDRegSize);
+ __ Ldr(reg, MemOperand(sp, stack_index));
+ return kArm64WordSize;
+}
+
void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
stream << Arm64ManagedRegister::FromXRegister(XRegister(reg));
}
@@ -686,58 +792,162 @@
}
}
-void CodeGeneratorARM64::MoveHelper(Location destination,
- Location source,
- Primitive::Type type) {
+
+static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
+ DCHECK(constant.IsConstant());
+ HConstant* cst = constant.GetConstant();
+ return (cst->IsIntConstant() && type == Primitive::kPrimInt) ||
+ (cst->IsLongConstant() && type == Primitive::kPrimLong) ||
+ (cst->IsFloatConstant() && type == Primitive::kPrimFloat) ||
+ (cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
+}
+
+void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Primitive::Type type) {
if (source.Equals(destination)) {
return;
}
- if (destination.IsRegister()) {
- Register dst = RegisterFrom(destination, type);
- if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
- DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
- __ Ldr(dst, StackOperandFrom(source));
- } else {
- __ Mov(dst, OperandFrom(source, type));
- }
- } else if (destination.IsFpuRegister()) {
- FPRegister dst = FPRegisterFrom(destination, type);
- if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
- DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
- __ Ldr(dst, StackOperandFrom(source));
- } else if (source.IsFpuRegister()) {
- __ Fmov(dst, FPRegisterFrom(source, type));
- } else {
- MoveConstant(dst, source.GetConstant());
- }
- } else {
- DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
- if (source.IsRegister()) {
- __ Str(RegisterFrom(source, type), StackOperandFrom(destination));
- } else if (source.IsFpuRegister()) {
- __ Str(FPRegisterFrom(source, type), StackOperandFrom(destination));
- } else if (source.IsConstant()) {
- UseScratchRegisterScope temps(GetVIXLAssembler());
- HConstant* cst = source.GetConstant();
- CPURegister temp;
- if (cst->IsIntConstant() || cst->IsLongConstant()) {
- temp = cst->IsIntConstant() ? temps.AcquireW() : temps.AcquireX();
+
+ // A valid move can always be inferred from the destination and source
+ // locations. When moving from and to a register, the argument type can be
+ // used to generate 32bit instead of 64bit moves. In debug mode we also
+ // checks the coherency of the locations and the type.
+ bool unspecified_type = (type == Primitive::kPrimVoid);
+
+ if (destination.IsRegister() || destination.IsFpuRegister()) {
+ if (unspecified_type) {
+ HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
+ if (source.IsStackSlot() ||
+ (src_cst != nullptr && (src_cst->IsIntConstant() || src_cst->IsFloatConstant()))) {
+ // For stack slots and 32bit constants, a 64bit type is appropriate.
+ type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
} else {
- DCHECK(cst->IsFloatConstant() || cst->IsDoubleConstant());
- temp = cst->IsFloatConstant() ? temps.AcquireS() : temps.AcquireD();
+ // If the source is a double stack slot or a 64bit constant, a 64bit
+ // type is appropriate. Else the source is a register, and since the
+ // type has not been specified, we chose a 64bit type to force a 64bit
+ // move.
+ type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble;
}
- MoveConstant(temp, cst);
+ }
+ DCHECK((destination.IsFpuRegister() && IsFPType(type)) ||
+ (destination.IsRegister() && !IsFPType(type)));
+ CPURegister dst = CPURegisterFrom(destination, type);
+ if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
+ DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
+ __ Ldr(dst, StackOperandFrom(source));
+ } else if (source.IsConstant()) {
+ DCHECK(CoherentConstantAndType(source, type));
+ MoveConstant(dst, source.GetConstant());
+ } else {
+ if (destination.IsRegister()) {
+ __ Mov(Register(dst), RegisterFrom(source, type));
+ } else {
+ __ Fmov(FPRegister(dst), FPRegisterFrom(source, type));
+ }
+ }
+
+ } else { // The destination is not a register. It must be a stack slot.
+ DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
+ if (source.IsRegister() || source.IsFpuRegister()) {
+ if (unspecified_type) {
+ if (source.IsRegister()) {
+ type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong;
+ } else {
+ type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble;
+ }
+ }
+ DCHECK((destination.IsDoubleStackSlot() == Is64BitType(type)) &&
+ (source.IsFpuRegister() == IsFPType(type)));
+ __ Str(CPURegisterFrom(source, type), StackOperandFrom(destination));
+ } else if (source.IsConstant()) {
+ DCHECK(unspecified_type || CoherentConstantAndType(source, type));
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ HConstant* src_cst = source.GetConstant();
+ CPURegister temp;
+ if (src_cst->IsIntConstant()) {
+ temp = temps.AcquireW();
+ } else if (src_cst->IsLongConstant()) {
+ temp = temps.AcquireX();
+ } else if (src_cst->IsFloatConstant()) {
+ temp = temps.AcquireS();
+ } else {
+ DCHECK(src_cst->IsDoubleConstant());
+ temp = temps.AcquireD();
+ }
+ MoveConstant(temp, src_cst);
__ Str(temp, StackOperandFrom(destination));
} else {
DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
+ DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
UseScratchRegisterScope temps(GetVIXLAssembler());
- Register temp = destination.IsDoubleStackSlot() ? temps.AcquireX() : temps.AcquireW();
+ // There is generally less pressure on FP registers.
+ FPRegister temp = destination.IsDoubleStackSlot() ? temps.AcquireD() : temps.AcquireS();
__ Ldr(temp, StackOperandFrom(source));
__ Str(temp, StackOperandFrom(destination));
}
}
}
+void CodeGeneratorARM64::SwapLocations(Location loc1, Location loc2) {
+ DCHECK(!loc1.IsConstant());
+ DCHECK(!loc2.IsConstant());
+
+ if (loc1.Equals(loc2)) {
+ return;
+ }
+
+ UseScratchRegisterScope temps(GetAssembler()->vixl_masm_);
+
+ bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot();
+ bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot();
+ bool is_fp_reg1 = loc1.IsFpuRegister();
+ bool is_fp_reg2 = loc2.IsFpuRegister();
+
+ if (loc2.IsRegister() && loc1.IsRegister()) {
+ Register r1 = XRegisterFrom(loc1);
+ Register r2 = XRegisterFrom(loc2);
+ Register tmp = temps.AcquireSameSizeAs(r1);
+ __ Mov(tmp, r2);
+ __ Mov(r2, r1);
+ __ Mov(r1, tmp);
+ } else if (is_fp_reg2 && is_fp_reg1) {
+ FPRegister r1 = DRegisterFrom(loc1);
+ FPRegister r2 = DRegisterFrom(loc2);
+ FPRegister tmp = temps.AcquireSameSizeAs(r1);
+ __ Fmov(tmp, r2);
+ __ Fmov(r2, r1);
+ __ Fmov(r1, tmp);
+ } else if (is_slot1 != is_slot2) {
+ MemOperand mem = StackOperandFrom(is_slot1 ? loc1 : loc2);
+ Location reg_loc = is_slot1 ? loc2 : loc1;
+ CPURegister reg, tmp;
+ if (reg_loc.IsFpuRegister()) {
+ reg = DRegisterFrom(reg_loc);
+ tmp = temps.AcquireD();
+ } else {
+ reg = XRegisterFrom(reg_loc);
+ tmp = temps.AcquireX();
+ }
+ __ Ldr(tmp, mem);
+ __ Str(reg, mem);
+ if (reg_loc.IsFpuRegister()) {
+ __ Fmov(FPRegister(reg), FPRegister(tmp));
+ } else {
+ __ Mov(Register(reg), Register(tmp));
+ }
+ } else if (is_slot1 && is_slot2) {
+ MemOperand mem1 = StackOperandFrom(loc1);
+ MemOperand mem2 = StackOperandFrom(loc2);
+ Register tmp1 = loc1.IsStackSlot() ? temps.AcquireW() : temps.AcquireX();
+ Register tmp2 = temps.AcquireSameSizeAs(tmp1);
+ __ Ldr(tmp1, mem1);
+ __ Ldr(tmp2, mem2);
+ __ Str(tmp1, mem2);
+ __ Str(tmp2, mem1);
+ } else {
+ LOG(FATAL) << "Unimplemented";
+ }
+}
+
void CodeGeneratorARM64::Load(Primitive::Type type,
vixl::CPURegister dst,
const vixl::MemOperand& src) {
@@ -850,7 +1060,7 @@
codegen_(codegen) {}
#define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M) \
- M(ParallelMove) \
+ /* No unimplemented IR. */
#define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
@@ -1113,7 +1323,9 @@
}
void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
- BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64();
+ LocationSummary* locations = instruction->GetLocations();
+ BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(
+ instruction, locations->InAt(0), locations->InAt(1));
codegen_->AddSlowPath(slow_path);
__ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
@@ -1125,22 +1337,24 @@
instruction, LocationSummary::kCallOnSlowPath);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
}
void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
- UseScratchRegisterScope temps(GetVIXLAssembler());
+ LocationSummary* locations = instruction->GetLocations();
Register obj = InputRegisterAt(instruction, 0);;
Register cls = InputRegisterAt(instruction, 1);;
- Register temp = temps.AcquireW();
+ Register obj_cls = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
- SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64();
+ SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
+ instruction, locations->InAt(1), LocationFrom(obj_cls), instruction->GetDexPc());
codegen_->AddSlowPath(slow_path);
// TODO: avoid this check if we know obj is not null.
__ Cbz(obj, slow_path->GetExitLabel());
// Compare the class of `obj` with `cls`.
- __ Ldr(temp, HeapOperand(obj, mirror::Object::ClassOffset()));
- __ Cmp(temp, cls);
+ __ Ldr(obj_cls, HeapOperand(obj, mirror::Object::ClassOffset()));
+ __ Cmp(obj_cls, cls);
__ B(ne, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -1316,12 +1530,20 @@
codegen_->AddSlowPath(slow_path);
Location value = instruction->GetLocations()->InAt(0);
+ Primitive::Type type = instruction->GetType();
+
+ if ((type != Primitive::kPrimInt) && (type != Primitive::kPrimLong)) {
+ LOG(FATAL) << "Unexpected type " << type << "for DivZeroCheck.";
+ return;
+ }
+
if (value.IsConstant()) {
int64_t divisor = Int64ConstantFrom(value);
if (divisor == 0) {
__ B(slow_path->GetEntryLabel());
} else {
- LOG(FATAL) << "Divisions by non-null constants should have been optimized away.";
+ // A division by a non-null constant is valid. We don't need to perform
+ // any check, so simply fall through.
}
} else {
__ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
@@ -1496,7 +1718,8 @@
// If the classes are not equal, we go into a slow path.
DCHECK(locations->OnlyCallsOnSlowPath());
SlowPathCodeARM64* slow_path =
- new (GetGraph()->GetArena()) TypeCheckSlowPathARM64();
+ new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
+ instruction, locations->InAt(1), locations->Out(), instruction->GetDexPc());
codegen_->AddSlowPath(slow_path);
__ B(ne, slow_path->GetEntryLabel());
__ Mov(out, 1);
@@ -1914,6 +2137,14 @@
HandleBinaryOp(instruction);
}
+void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
+ codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
@@ -1989,7 +2220,7 @@
void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction) {
UNUSED(instruction);
codegen_->GenerateFrameExit();
- __ Br(lr);
+ __ Ret();
}
void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
@@ -1999,7 +2230,7 @@
void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) {
UNUSED(instruction);
codegen_->GenerateFrameExit();
- __ Br(lr);
+ __ Ret();
}
void LocationsBuilderARM64::VisitShl(HShl* shl) {
@@ -2157,17 +2388,18 @@
if (IsIntegralType(result_type) && IsIntegralType(input_type)) {
int result_size = Primitive::ComponentSize(result_type);
int input_size = Primitive::ComponentSize(input_type);
- int min_size = kBitsPerByte * std::min(result_size, input_size);
+ int min_size = std::min(result_size, input_size);
Register output = OutputRegister(conversion);
Register source = InputRegisterAt(conversion, 0);
- if ((result_type == Primitive::kPrimChar) ||
- ((input_type == Primitive::kPrimChar) && (result_size > input_size))) {
- __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size);
+ if ((result_type == Primitive::kPrimChar) && (input_size < result_size)) {
+ __ Ubfx(output, source, 0, result_size * kBitsPerByte);
+ } else if ((result_type == Primitive::kPrimChar) ||
+ ((input_type == Primitive::kPrimChar) && (result_size > input_size))) {
+ __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
} else {
- __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size);
+ __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
}
} else if (IsFPType(result_type) && IsIntegralType(input_type)) {
- CHECK(input_type == Primitive::kPrimInt || input_type == Primitive::kPrimLong);
__ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
} else if (IsIntegralType(result_type) && IsFPType(input_type)) {
CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 0e3d25f..1d5bfb7 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -139,6 +139,27 @@
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
};
+class ParallelMoveResolverARM64 : public ParallelMoveResolver {
+ public:
+ ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen)
+ : ParallelMoveResolver(allocator), codegen_(codegen) {}
+
+ void EmitMove(size_t index) OVERRIDE;
+ void EmitSwap(size_t index) OVERRIDE;
+ void RestoreScratch(int reg) OVERRIDE;
+ void SpillScratch(int reg) OVERRIDE;
+
+ private:
+ Arm64Assembler* GetAssembler() const;
+ vixl::MacroAssembler* GetVIXLAssembler() const {
+ return GetAssembler()->vixl_masm_;
+ }
+
+ CodeGeneratorARM64* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64);
+};
+
class CodeGeneratorARM64 : public CodeGenerator {
public:
explicit CodeGeneratorARM64(HGraph* graph);
@@ -193,19 +214,10 @@
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
- size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
- UNUSED(stack_index);
- UNUSED(reg_id);
- LOG(INFO) << "CodeGeneratorARM64::SaveCoreRegister()";
- return kArm64WordSize;
- }
-
- size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
- UNUSED(stack_index);
- UNUSED(reg_id);
- LOG(INFO) << "CodeGeneratorARM64::RestoreCoreRegister()";
- return kArm64WordSize;
- }
+ size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
+ size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id);
// The number of registers that can be allocated. The register allocator may
// decide to reserve and not use a few of them.
@@ -237,7 +249,11 @@
// Code generation helpers.
void MoveConstant(vixl::CPURegister destination, HConstant* constant);
- void MoveHelper(Location destination, Location source, Primitive::Type type);
+ // The type is optional. When specified it must be coherent with the
+ // locations, and is used for optimisation and debugging.
+ void MoveLocation(Location destination, Location source,
+ Primitive::Type type = Primitive::kPrimVoid);
+ void SwapLocations(Location loc_1, Location loc_2);
void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
void LoadCurrentMethod(vixl::Register current_method);
@@ -245,10 +261,7 @@
// Generate code to invoke a runtime entry point.
void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc);
- ParallelMoveResolver* GetMoveResolver() OVERRIDE {
- UNIMPLEMENTED(INFO) << "TODO: MoveResolver";
- return nullptr;
- }
+ ParallelMoveResolverARM64* GetMoveResolver() { return &move_resolver_; }
private:
// Labels for each block that will be compiled.
@@ -256,11 +269,16 @@
LocationsBuilderARM64 location_builder_;
InstructionCodeGeneratorARM64 instruction_visitor_;
+ ParallelMoveResolverARM64 move_resolver_;
Arm64Assembler assembler_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
};
+inline Arm64Assembler* ParallelMoveResolverARM64::GetAssembler() const {
+ return codegen_->GetAssembler();
+}
+
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index fee3ea6..8b75cc7 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -129,12 +129,15 @@
std::function<void(HGraph*)> hook_before_codegen,
bool has_result,
Expected expected) {
- if (kRuntimeISA == kX86) {
- x86::CodeGeneratorX86 codegenX86(graph);
- RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
- } else if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
+ if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
arm::CodeGeneratorARM codegenARM(graph);
RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
+ } else if (kRuntimeISA == kArm64) {
+ arm64::CodeGeneratorARM64 codegenARM64(graph);
+ RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
+ } else if (kRuntimeISA == kX86) {
+ x86::CodeGeneratorX86 codegenX86(graph);
+ RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
} else if (kRuntimeISA == kX86_64) {
x86_64::CodeGeneratorX86_64 codegenX86_64(graph);
RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
@@ -362,11 +365,7 @@
#undef NOT_LONG_TEST
-#if defined(__aarch64__)
-TEST(CodegenTest, DISABLED_IntToLongOfLongToInt) {
-#else
TEST(CodegenTest, IntToLongOfLongToInt) {
-#endif
const int64_t input = INT64_C(4294967296); // 2^32
const uint16_t word0 = Low16Bits(Low32Bits(input)); // LSW.
const uint16_t word1 = High16Bits(Low32Bits(input));
@@ -493,10 +492,8 @@
TestCode(data, true, 12); \
}
-#if !defined(__aarch64__)
MUL_TEST(INT, MulInt);
MUL_TEST(LONG, MulLong);
-#endif
TEST(CodegenTest, ReturnMulIntLit8) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
@@ -633,11 +630,7 @@
}
}
-#if defined(__aarch64__)
-TEST(CodegenTest, DISABLED_ReturnDivIntLit8) {
-#else
TEST(CodegenTest, ReturnDivIntLit8) {
-#endif
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::DIV_INT_LIT8, 3 << 8 | 0,
@@ -646,11 +639,7 @@
TestCode(data, true, 1);
}
-#if defined(__aarch64__)
-TEST(CodegenTest, DISABLED_ReturnDivInt2Addr) {
-#else
TEST(CodegenTest, ReturnDivInt2Addr) {
-#endif
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0,
Instruction::CONST_4 | 2 << 12 | 1 << 8,
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index a6c0635..c1c805d 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -64,15 +64,17 @@
if (!Supports(instruction_set)) {
return false;
}
+ if (instruction_set == kArm64 || instruction_set == kX86_64) {
+ return true;
+ }
for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) {
for (HInstructionIterator it(graph.GetBlocks().Get(i)->GetInstructions());
!it.Done();
it.Advance()) {
HInstruction* current = it.Current();
- if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false;
- if ((current->GetType() == Primitive::kPrimFloat
- || current->GetType() == Primitive::kPrimDouble)
- && instruction_set != kX86_64) {
+ if (current->GetType() == Primitive::kPrimLong ||
+ current->GetType() == Primitive::kPrimFloat ||
+ current->GetType() == Primitive::kPrimDouble) {
return false;
}
}
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 976ee39..cbe741c 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -67,10 +67,11 @@
static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
static bool Supports(InstructionSet instruction_set) {
- return instruction_set == kX86
- || instruction_set == kArm
- || instruction_set == kX86_64
- || instruction_set == kThumb2;
+ return instruction_set == kArm
+ || instruction_set == kArm64
+ || instruction_set == kThumb2
+ || instruction_set == kX86
+ || instruction_set == kX86_64;
}
size_t GetNumberOfSpillSlots() const {