diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 342 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 54 | ||||
| -rw-r--r-- | compiler/optimizing/codegen_test.cc | 25 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator.cc | 10 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator.h | 9 |
5 files changed, 341 insertions, 99 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index a61ef2d4f6..b048c07b4c 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -90,10 +90,12 @@ int ARTRegCodeFromVIXL(int code) { } Register XRegisterFrom(Location location) { + DCHECK(location.IsRegister()); return Register::XRegFromCode(VIXLRegCodeFromART(location.reg())); } Register WRegisterFrom(Location location) { + DCHECK(location.IsRegister()); return Register::WRegFromCode(VIXLRegCodeFromART(location.reg())); } @@ -112,10 +114,12 @@ Register InputRegisterAt(HInstruction* instr, int input_index) { } FPRegister DRegisterFrom(Location location) { + DCHECK(location.IsFpuRegister()); return FPRegister::DRegFromCode(location.reg()); } FPRegister SRegisterFrom(Location location) { + DCHECK(location.IsFpuRegister()); return FPRegister::SRegFromCode(location.reg()); } @@ -133,6 +137,11 @@ FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) { instr->InputAt(input_index)->GetType()); } +CPURegister CPURegisterFrom(Location location, Primitive::Type type) { + return IsFPType(type) ? CPURegister(FPRegisterFrom(location, type)) + : CPURegister(RegisterFrom(location, type)); +} + CPURegister OutputCPURegister(HInstruction* instr) { return IsFPType(instr->GetType()) ? static_cast<CPURegister>(OutputFPRegister(instr)) : static_cast<CPURegister>(OutputRegister(instr)); @@ -266,14 +275,32 @@ class SlowPathCodeARM64 : public SlowPathCode { class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { public: - BoundsCheckSlowPathARM64() {} + BoundsCheckSlowPathARM64(HBoundsCheck* instruction, + Location index_location, + Location length_location) + : instruction_(instruction), + index_location_(index_location), + length_location_(length_location) {} + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); - __ Brk(__LINE__); // TODO: Unimplemented BoundsCheckSlowPathARM64. + // We're moving two locations to locations that could overlap, so we need a parallel + // move resolver. + InvokeRuntimeCallingConvention calling_convention; + codegen->EmitParallelMoves( + index_location_, LocationFrom(calling_convention.GetRegisterAt(0)), + length_location_, LocationFrom(calling_convention.GetRegisterAt(1))); + arm64_codegen->InvokeRuntime( + QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc()); } private: + HBoundsCheck* const instruction_; + const Location index_location_; + const Location length_location_; + DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64); }; @@ -322,7 +349,7 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); Primitive::Type type = at_->GetType(); - arm64_codegen->MoveHelper(out, calling_convention.GetReturnLocation(type), type); + arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); } codegen->RestoreLiveRegisters(locations); @@ -364,7 +391,7 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc()); Primitive::Type type = instruction_->GetType(); - arm64_codegen->MoveHelper(locations->Out(), calling_convention.GetReturnLocation(type), type); + arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); codegen->RestoreLiveRegisters(locations); __ B(GetExitLabel()); @@ -445,15 +472,51 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { public: - TypeCheckSlowPathARM64() {} + TypeCheckSlowPathARM64(HInstruction* instruction, + Location class_to_check, + Location object_class, + uint32_t dex_pc) + : instruction_(instruction), + class_to_check_(class_to_check), + object_class_(object_class), + dex_pc_(dex_pc) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(instruction_->IsCheckCast() + || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + __ Bind(GetEntryLabel()); - __ Brk(__LINE__); // TODO: Unimplemented TypeCheckSlowPathARM64. + codegen->SaveLiveRegisters(locations); + + // We're moving two locations to locations that could overlap, so we need a parallel + // move resolver. + InvokeRuntimeCallingConvention calling_convention; + codegen->EmitParallelMoves( + class_to_check_, LocationFrom(calling_convention.GetRegisterAt(0)), + object_class_, LocationFrom(calling_convention.GetRegisterAt(1))); + + if (instruction_->IsInstanceOf()) { + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_); + Primitive::Type ret_type = instruction_->GetType(); + Location ret_loc = calling_convention.GetReturnLocation(ret_type); + arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); + } else { + DCHECK(instruction_->IsCheckCast()); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_); + } + + codegen->RestoreLiveRegisters(locations); __ B(GetExitLabel()); } private: + HInstruction* const instruction_; + const Location class_to_check_; + const Location object_class_; + uint32_t dex_pc_; + DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64); }; @@ -487,7 +550,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph) kNumberOfAllocatableRegisterPairs), block_labels_(nullptr), location_builder_(graph, this), - instruction_visitor_(graph, this) {} + instruction_visitor_(graph, this), + move_resolver_(graph->GetArena(), this) {} #undef __ #define __ GetVIXLAssembler()-> @@ -498,6 +562,24 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { CodeGenerator::Finalize(allocator); } +void ParallelMoveResolverARM64::EmitMove(size_t index) { + MoveOperands* move = moves_.Get(index); + codegen_->MoveLocation(move->GetDestination(), move->GetSource()); +} + +void ParallelMoveResolverARM64::EmitSwap(size_t index) { + MoveOperands* move = moves_.Get(index); + codegen_->SwapLocations(move->GetDestination(), move->GetSource()); +} + +void ParallelMoveResolverARM64::RestoreScratch(int reg) { + __ Pop(Register(VIXLRegCodeFromART(reg), kXRegSize)); +} + +void ParallelMoveResolverARM64::SpillScratch(int reg) { + __ Push(Register(VIXLRegCodeFromART(reg), kXRegSize)); +} + void CodeGeneratorARM64::GenerateFrameEntry() { bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod(); if (do_overflow_check) { @@ -571,18 +653,18 @@ void CodeGeneratorARM64::Move(HInstruction* instruction, } } else if (instruction->IsTemporary()) { Location temp_location = GetTemporaryLocation(instruction->AsTemporary()); - MoveHelper(location, temp_location, type); + MoveLocation(location, temp_location, type); } else if (instruction->IsLoadLocal()) { uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal()); if (Is64BitType(type)) { - MoveHelper(location, Location::DoubleStackSlot(stack_slot), type); + MoveLocation(location, Location::DoubleStackSlot(stack_slot), type); } else { - MoveHelper(location, Location::StackSlot(stack_slot), type); + MoveLocation(location, Location::StackSlot(stack_slot), type); } } else { DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary()); - MoveHelper(location, locations->Out(), type); + MoveLocation(location, locations->Out(), type); } } @@ -665,6 +747,30 @@ Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const { } } +size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { + Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); + __ Str(reg, MemOperand(sp, stack_index)); + return kArm64WordSize; +} + +size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { + Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); + __ Ldr(reg, MemOperand(sp, stack_index)); + return kArm64WordSize; +} + +size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + FPRegister reg = FPRegister(reg_id, kDRegSize); + __ Str(reg, MemOperand(sp, stack_index)); + return kArm64WordSize; +} + +size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + FPRegister reg = FPRegister(reg_id, kDRegSize); + __ Ldr(reg, MemOperand(sp, stack_index)); + return kArm64WordSize; +} + void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const { stream << Arm64ManagedRegister::FromXRegister(XRegister(reg)); } @@ -686,58 +792,162 @@ void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* consta } } -void CodeGeneratorARM64::MoveHelper(Location destination, - Location source, - Primitive::Type type) { + +static bool CoherentConstantAndType(Location constant, Primitive::Type type) { + DCHECK(constant.IsConstant()); + HConstant* cst = constant.GetConstant(); + return (cst->IsIntConstant() && type == Primitive::kPrimInt) || + (cst->IsLongConstant() && type == Primitive::kPrimLong) || + (cst->IsFloatConstant() && type == Primitive::kPrimFloat) || + (cst->IsDoubleConstant() && type == Primitive::kPrimDouble); +} + +void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Primitive::Type type) { if (source.Equals(destination)) { return; } - if (destination.IsRegister()) { - Register dst = RegisterFrom(destination, type); - if (source.IsStackSlot() || source.IsDoubleStackSlot()) { - DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot()); - __ Ldr(dst, StackOperandFrom(source)); - } else { - __ Mov(dst, OperandFrom(source, type)); + + // A valid move can always be inferred from the destination and source + // locations. When moving from and to a register, the argument type can be + // used to generate 32bit instead of 64bit moves. In debug mode we also + // checks the coherency of the locations and the type. + bool unspecified_type = (type == Primitive::kPrimVoid); + + if (destination.IsRegister() || destination.IsFpuRegister()) { + if (unspecified_type) { + HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr; + if (source.IsStackSlot() || + (src_cst != nullptr && (src_cst->IsIntConstant() || src_cst->IsFloatConstant()))) { + // For stack slots and 32bit constants, a 64bit type is appropriate. + type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat; + } else { + // If the source is a double stack slot or a 64bit constant, a 64bit + // type is appropriate. Else the source is a register, and since the + // type has not been specified, we chose a 64bit type to force a 64bit + // move. + type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble; + } } - } else if (destination.IsFpuRegister()) { - FPRegister dst = FPRegisterFrom(destination, type); + DCHECK((destination.IsFpuRegister() && IsFPType(type)) || + (destination.IsRegister() && !IsFPType(type))); + CPURegister dst = CPURegisterFrom(destination, type); if (source.IsStackSlot() || source.IsDoubleStackSlot()) { DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot()); __ Ldr(dst, StackOperandFrom(source)); - } else if (source.IsFpuRegister()) { - __ Fmov(dst, FPRegisterFrom(source, type)); - } else { + } else if (source.IsConstant()) { + DCHECK(CoherentConstantAndType(source, type)); MoveConstant(dst, source.GetConstant()); + } else { + if (destination.IsRegister()) { + __ Mov(Register(dst), RegisterFrom(source, type)); + } else { + __ Fmov(FPRegister(dst), FPRegisterFrom(source, type)); + } } - } else { + + } else { // The destination is not a register. It must be a stack slot. DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot()); - if (source.IsRegister()) { - __ Str(RegisterFrom(source, type), StackOperandFrom(destination)); - } else if (source.IsFpuRegister()) { - __ Str(FPRegisterFrom(source, type), StackOperandFrom(destination)); + if (source.IsRegister() || source.IsFpuRegister()) { + if (unspecified_type) { + if (source.IsRegister()) { + type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong; + } else { + type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble; + } + } + DCHECK((destination.IsDoubleStackSlot() == Is64BitType(type)) && + (source.IsFpuRegister() == IsFPType(type))); + __ Str(CPURegisterFrom(source, type), StackOperandFrom(destination)); } else if (source.IsConstant()) { + DCHECK(unspecified_type || CoherentConstantAndType(source, type)); UseScratchRegisterScope temps(GetVIXLAssembler()); - HConstant* cst = source.GetConstant(); + HConstant* src_cst = source.GetConstant(); CPURegister temp; - if (cst->IsIntConstant() || cst->IsLongConstant()) { - temp = cst->IsIntConstant() ? temps.AcquireW() : temps.AcquireX(); + if (src_cst->IsIntConstant()) { + temp = temps.AcquireW(); + } else if (src_cst->IsLongConstant()) { + temp = temps.AcquireX(); + } else if (src_cst->IsFloatConstant()) { + temp = temps.AcquireS(); } else { - DCHECK(cst->IsFloatConstant() || cst->IsDoubleConstant()); - temp = cst->IsFloatConstant() ? temps.AcquireS() : temps.AcquireD(); + DCHECK(src_cst->IsDoubleConstant()); + temp = temps.AcquireD(); } - MoveConstant(temp, cst); + MoveConstant(temp, src_cst); __ Str(temp, StackOperandFrom(destination)); } else { DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); + DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot()); UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = destination.IsDoubleStackSlot() ? temps.AcquireX() : temps.AcquireW(); + // There is generally less pressure on FP registers. + FPRegister temp = destination.IsDoubleStackSlot() ? temps.AcquireD() : temps.AcquireS(); __ Ldr(temp, StackOperandFrom(source)); __ Str(temp, StackOperandFrom(destination)); } } } +void CodeGeneratorARM64::SwapLocations(Location loc1, Location loc2) { + DCHECK(!loc1.IsConstant()); + DCHECK(!loc2.IsConstant()); + + if (loc1.Equals(loc2)) { + return; + } + + UseScratchRegisterScope temps(GetAssembler()->vixl_masm_); + + bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot(); + bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot(); + bool is_fp_reg1 = loc1.IsFpuRegister(); + bool is_fp_reg2 = loc2.IsFpuRegister(); + + if (loc2.IsRegister() && loc1.IsRegister()) { + Register r1 = XRegisterFrom(loc1); + Register r2 = XRegisterFrom(loc2); + Register tmp = temps.AcquireSameSizeAs(r1); + __ Mov(tmp, r2); + __ Mov(r2, r1); + __ Mov(r1, tmp); + } else if (is_fp_reg2 && is_fp_reg1) { + FPRegister r1 = DRegisterFrom(loc1); + FPRegister r2 = DRegisterFrom(loc2); + FPRegister tmp = temps.AcquireSameSizeAs(r1); + __ Fmov(tmp, r2); + __ Fmov(r2, r1); + __ Fmov(r1, tmp); + } else if (is_slot1 != is_slot2) { + MemOperand mem = StackOperandFrom(is_slot1 ? loc1 : loc2); + Location reg_loc = is_slot1 ? loc2 : loc1; + CPURegister reg, tmp; + if (reg_loc.IsFpuRegister()) { + reg = DRegisterFrom(reg_loc); + tmp = temps.AcquireD(); + } else { + reg = XRegisterFrom(reg_loc); + tmp = temps.AcquireX(); + } + __ Ldr(tmp, mem); + __ Str(reg, mem); + if (reg_loc.IsFpuRegister()) { + __ Fmov(FPRegister(reg), FPRegister(tmp)); + } else { + __ Mov(Register(reg), Register(tmp)); + } + } else if (is_slot1 && is_slot2) { + MemOperand mem1 = StackOperandFrom(loc1); + MemOperand mem2 = StackOperandFrom(loc2); + Register tmp1 = loc1.IsStackSlot() ? temps.AcquireW() : temps.AcquireX(); + Register tmp2 = temps.AcquireSameSizeAs(tmp1); + __ Ldr(tmp1, mem1); + __ Ldr(tmp2, mem2); + __ Str(tmp1, mem2); + __ Str(tmp2, mem1); + } else { + LOG(FATAL) << "Unimplemented"; + } +} + void CodeGeneratorARM64::Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src) { @@ -850,7 +1060,7 @@ InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph, codegen_(codegen) {} #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M) \ - M(ParallelMove) \ + /* No unimplemented IR. */ #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode @@ -1113,7 +1323,9 @@ void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) { } void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) { - BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(); + LocationSummary* locations = instruction->GetLocations(); + BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64( + instruction, locations->InAt(0), locations->InAt(1)); codegen_->AddSlowPath(slow_path); __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1)); @@ -1125,22 +1337,24 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { instruction, LocationSummary::kCallOnSlowPath); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); } void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { - UseScratchRegisterScope temps(GetVIXLAssembler()); + LocationSummary* locations = instruction->GetLocations(); Register obj = InputRegisterAt(instruction, 0);; Register cls = InputRegisterAt(instruction, 1);; - Register temp = temps.AcquireW(); + Register obj_cls = WRegisterFrom(instruction->GetLocations()->GetTemp(0)); - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(); + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64( + instruction, locations->InAt(1), LocationFrom(obj_cls), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); // TODO: avoid this check if we know obj is not null. __ Cbz(obj, slow_path->GetExitLabel()); // Compare the class of `obj` with `cls`. - __ Ldr(temp, HeapOperand(obj, mirror::Object::ClassOffset())); - __ Cmp(temp, cls); + __ Ldr(obj_cls, HeapOperand(obj, mirror::Object::ClassOffset())); + __ Cmp(obj_cls, cls); __ B(ne, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -1316,12 +1530,20 @@ void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction codegen_->AddSlowPath(slow_path); Location value = instruction->GetLocations()->InAt(0); + Primitive::Type type = instruction->GetType(); + + if ((type != Primitive::kPrimInt) && (type != Primitive::kPrimLong)) { + LOG(FATAL) << "Unexpected type " << type << "for DivZeroCheck."; + return; + } + if (value.IsConstant()) { int64_t divisor = Int64ConstantFrom(value); if (divisor == 0) { __ B(slow_path->GetEntryLabel()); } else { - LOG(FATAL) << "Divisions by non-null constants should have been optimized away."; + // A division by a non-null constant is valid. We don't need to perform + // any check, so simply fall through. } } else { __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel()); @@ -1496,7 +1718,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { // If the classes are not equal, we go into a slow path. DCHECK(locations->OnlyCallsOnSlowPath()); SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(); + new (GetGraph()->GetArena()) TypeCheckSlowPathARM64( + instruction, locations->InAt(1), locations->Out(), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); @@ -1914,6 +2137,14 @@ void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) { HandleBinaryOp(instruction); } +void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) { + codegen_->GetMoveResolver()->EmitNativeCode(instruction); +} + void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); @@ -1989,7 +2220,7 @@ void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction) { UNUSED(instruction); codegen_->GenerateFrameExit(); - __ Br(lr); + __ Ret(); } void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { @@ -1999,7 +2230,7 @@ void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) { UNUSED(instruction); codegen_->GenerateFrameExit(); - __ Br(lr); + __ Ret(); } void LocationsBuilderARM64::VisitShl(HShl* shl) { @@ -2157,17 +2388,18 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers if (IsIntegralType(result_type) && IsIntegralType(input_type)) { int result_size = Primitive::ComponentSize(result_type); int input_size = Primitive::ComponentSize(input_type); - int min_size = kBitsPerByte * std::min(result_size, input_size); + int min_size = std::min(result_size, input_size); Register output = OutputRegister(conversion); Register source = InputRegisterAt(conversion, 0); - if ((result_type == Primitive::kPrimChar) || - ((input_type == Primitive::kPrimChar) && (result_size > input_size))) { - __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size); + if ((result_type == Primitive::kPrimChar) && (input_size < result_size)) { + __ Ubfx(output, source, 0, result_size * kBitsPerByte); + } else if ((result_type == Primitive::kPrimChar) || + ((input_type == Primitive::kPrimChar) && (result_size > input_size))) { + __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); } else { - __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size); + __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); } } else if (IsFPType(result_type) && IsIntegralType(input_type)) { - CHECK(input_type == Primitive::kPrimInt || input_type == Primitive::kPrimLong); __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0)); } else if (IsIntegralType(result_type) && IsFPType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 0e3d25f9aa..1d5bfb734e 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -139,6 +139,27 @@ class LocationsBuilderARM64 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64); }; +class ParallelMoveResolverARM64 : public ParallelMoveResolver { + public: + ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen) + : ParallelMoveResolver(allocator), codegen_(codegen) {} + + void EmitMove(size_t index) OVERRIDE; + void EmitSwap(size_t index) OVERRIDE; + void RestoreScratch(int reg) OVERRIDE; + void SpillScratch(int reg) OVERRIDE; + + private: + Arm64Assembler* GetAssembler() const; + vixl::MacroAssembler* GetVIXLAssembler() const { + return GetAssembler()->vixl_masm_; + } + + CodeGeneratorARM64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64); +}; + class CodeGeneratorARM64 : public CodeGenerator { public: explicit CodeGeneratorARM64(HGraph* graph); @@ -193,19 +214,10 @@ class CodeGeneratorARM64 : public CodeGenerator { Location GetStackLocation(HLoadLocal* load) const OVERRIDE; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) { - UNUSED(stack_index); - UNUSED(reg_id); - LOG(INFO) << "CodeGeneratorARM64::SaveCoreRegister()"; - return kArm64WordSize; - } - - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { - UNUSED(stack_index); - UNUSED(reg_id); - LOG(INFO) << "CodeGeneratorARM64::RestoreCoreRegister()"; - return kArm64WordSize; - } + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id); + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id); + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id); + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id); // The number of registers that can be allocated. The register allocator may // decide to reserve and not use a few of them. @@ -237,7 +249,11 @@ class CodeGeneratorARM64 : public CodeGenerator { // Code generation helpers. void MoveConstant(vixl::CPURegister destination, HConstant* constant); - void MoveHelper(Location destination, Location source, Primitive::Type type); + // The type is optional. When specified it must be coherent with the + // locations, and is used for optimisation and debugging. + void MoveLocation(Location destination, Location source, + Primitive::Type type = Primitive::kPrimVoid); + void SwapLocations(Location loc_1, Location loc_2); void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); void LoadCurrentMethod(vixl::Register current_method); @@ -245,10 +261,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // Generate code to invoke a runtime entry point. void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc); - ParallelMoveResolver* GetMoveResolver() OVERRIDE { - UNIMPLEMENTED(INFO) << "TODO: MoveResolver"; - return nullptr; - } + ParallelMoveResolverARM64* GetMoveResolver() { return &move_resolver_; } private: // Labels for each block that will be compiled. @@ -256,11 +269,16 @@ class CodeGeneratorARM64 : public CodeGenerator { LocationsBuilderARM64 location_builder_; InstructionCodeGeneratorARM64 instruction_visitor_; + ParallelMoveResolverARM64 move_resolver_; Arm64Assembler assembler_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64); }; +inline Arm64Assembler* ParallelMoveResolverARM64::GetAssembler() const { + return codegen_->GetAssembler(); +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index fee3ea6f8c..8b75cc7c65 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -129,12 +129,15 @@ static void RunCodeOptimized(HGraph* graph, std::function<void(HGraph*)> hook_before_codegen, bool has_result, Expected expected) { - if (kRuntimeISA == kX86) { - x86::CodeGeneratorX86 codegenX86(graph); - RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected); - } else if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { + if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { arm::CodeGeneratorARM codegenARM(graph); RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected); + } else if (kRuntimeISA == kArm64) { + arm64::CodeGeneratorARM64 codegenARM64(graph); + RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected); + } else if (kRuntimeISA == kX86) { + x86::CodeGeneratorX86 codegenX86(graph); + RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kX86_64) { x86_64::CodeGeneratorX86_64 codegenX86_64(graph); RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected); @@ -362,11 +365,7 @@ NOT_LONG_TEST(ReturnNotLongINT64_MAX, #undef NOT_LONG_TEST -#if defined(__aarch64__) -TEST(CodegenTest, DISABLED_IntToLongOfLongToInt) { -#else TEST(CodegenTest, IntToLongOfLongToInt) { -#endif const int64_t input = INT64_C(4294967296); // 2^32 const uint16_t word0 = Low16Bits(Low32Bits(input)); // LSW. const uint16_t word1 = High16Bits(Low32Bits(input)); @@ -493,10 +492,8 @@ TEST(CodegenTest, NonMaterializedCondition) { TestCode(data, true, 12); \ } -#if !defined(__aarch64__) MUL_TEST(INT, MulInt); MUL_TEST(LONG, MulLong); -#endif TEST(CodegenTest, ReturnMulIntLit8) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( @@ -633,11 +630,7 @@ TEST(CodegenTest, MaterializedCondition2) { } } -#if defined(__aarch64__) -TEST(CodegenTest, DISABLED_ReturnDivIntLit8) { -#else TEST(CodegenTest, ReturnDivIntLit8) { -#endif const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::DIV_INT_LIT8, 3 << 8 | 0, @@ -646,11 +639,7 @@ TEST(CodegenTest, ReturnDivIntLit8) { TestCode(data, true, 1); } -#if defined(__aarch64__) -TEST(CodegenTest, DISABLED_ReturnDivInt2Addr) { -#else TEST(CodegenTest, ReturnDivInt2Addr) { -#endif const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0, Instruction::CONST_4 | 2 << 12 | 1 << 8, diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index a6c06359a0..c1c805dc56 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -64,15 +64,17 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, if (!Supports(instruction_set)) { return false; } + if (instruction_set == kArm64 || instruction_set == kX86_64) { + return true; + } for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) { for (HInstructionIterator it(graph.GetBlocks().Get(i)->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); - if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false; - if ((current->GetType() == Primitive::kPrimFloat - || current->GetType() == Primitive::kPrimDouble) - && instruction_set != kX86_64) { + if (current->GetType() == Primitive::kPrimLong || + current->GetType() == Primitive::kPrimFloat || + current->GetType() == Primitive::kPrimDouble) { return false; } } diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 976ee39ca8..cbe741c2b3 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -67,10 +67,11 @@ class RegisterAllocator { static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set); static bool Supports(InstructionSet instruction_set) { - return instruction_set == kX86 - || instruction_set == kArm - || instruction_set == kX86_64 - || instruction_set == kThumb2; + return instruction_set == kArm + || instruction_set == kArm64 + || instruction_set == kThumb2 + || instruction_set == kX86 + || instruction_set == kX86_64; } size_t GetNumberOfSpillSlots() const { |