diff options
Diffstat (limited to 'compiler')
29 files changed, 3107 insertions, 1741 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 16a158cf6f..46b7e5d71b 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -98,6 +98,7 @@ LIBART_COMPILER_SRC_FILES_arm := \ utils/arm/assembler_arm.cc \ utils/arm/assembler_arm32.cc \ utils/arm/assembler_thumb2.cc \ + utils/arm/jni_macro_assembler_arm.cc \ utils/arm/managed_register_arm.cc \ # TODO We should really separate out those files that are actually needed for both variants of an @@ -114,6 +115,7 @@ LIBART_COMPILER_SRC_FILES_arm64 := \ optimizing/instruction_simplifier_shared.cc \ optimizing/intrinsics_arm64.cc \ utils/arm64/assembler_arm64.cc \ + utils/arm64/jni_macro_assembler_arm64.cc \ utils/arm64/managed_register_arm64.cc \ LIBART_COMPILER_SRC_FILES_mips := \ @@ -285,9 +287,9 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT # Vixl assembly support for ARM64 targets. ifeq ($$(art_ndebug_or_debug),debug) ifeq ($$(art_static_or_shared), static) - LOCAL_WHOLESTATIC_LIBRARIES += libvixl-arm64 + LOCAL_WHOLESTATIC_LIBRARIES += libvixld-arm64 else - LOCAL_SHARED_LIBRARIES += libvixl-arm64 + LOCAL_SHARED_LIBRARIES += libvixld-arm64 endif else ifeq ($$(art_static_or_shared), static) diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc index e2235345b7..86f91c5ac4 100644 --- a/compiler/exception_test.cc +++ b/compiler/exception_test.cc @@ -170,7 +170,7 @@ TEST_F(ExceptionTest, StackTraceElement) { Runtime* r = Runtime::Current(); r->SetInstructionSet(kRuntimeISA); ArtMethod* save_method = r->CreateCalleeSaveMethod(); - r->SetCalleeSaveMethod(save_method, Runtime::kSaveAll); + r->SetCalleeSaveMethod(save_method, Runtime::kSaveAllCalleeSaves); QuickMethodFrameInfo frame_info = r->GetRuntimeMethodFrameInfo(save_method); ASSERT_EQ(kStackAlignment, 16U); diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 7c87a60084..efae4d0583 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -1373,11 +1373,12 @@ void ImageWriter::CalculateNewObjectOffsets() { image_methods_[ImageHeader::kResolutionMethod] = runtime->GetResolutionMethod(); image_methods_[ImageHeader::kImtConflictMethod] = runtime->GetImtConflictMethod(); image_methods_[ImageHeader::kImtUnimplementedMethod] = runtime->GetImtUnimplementedMethod(); - image_methods_[ImageHeader::kCalleeSaveMethod] = runtime->GetCalleeSaveMethod(Runtime::kSaveAll); - image_methods_[ImageHeader::kRefsOnlySaveMethod] = - runtime->GetCalleeSaveMethod(Runtime::kRefsOnly); - image_methods_[ImageHeader::kRefsAndArgsSaveMethod] = - runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs); + image_methods_[ImageHeader::kSaveAllCalleeSavesMethod] = + runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves); + image_methods_[ImageHeader::kSaveRefsOnlyMethod] = + runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly); + image_methods_[ImageHeader::kSaveRefsAndArgsMethod] = + runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs); image_methods_[ImageHeader::kSaveEverythingMethod] = runtime->GetCalleeSaveMethod(Runtime::kSaveEverything); // Visit image methods first to have the main runtime methods in the first image. diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 5eaf11e9fb..ab85c12a1d 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -2531,7 +2531,7 @@ void LocationsBuilderARM::VisitAdd(HAdd* add) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -2568,13 +2568,18 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { break; case Primitive::kPrimLong: { - DCHECK(second.IsRegisterPair()); - __ adds(out.AsRegisterPairLow<Register>(), - first.AsRegisterPairLow<Register>(), - ShifterOperand(second.AsRegisterPairLow<Register>())); - __ adc(out.AsRegisterPairHigh<Register>(), - first.AsRegisterPairHigh<Register>(), - ShifterOperand(second.AsRegisterPairHigh<Register>())); + if (second.IsConstant()) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); + GenerateAddLongConst(out, first, value); + } else { + DCHECK(second.IsRegisterPair()); + __ adds(out.AsRegisterPairLow<Register>(), + first.AsRegisterPairLow<Register>(), + ShifterOperand(second.AsRegisterPairLow<Register>())); + __ adc(out.AsRegisterPairHigh<Register>(), + first.AsRegisterPairHigh<Register>(), + ShifterOperand(second.AsRegisterPairHigh<Register>())); + } break; } @@ -2608,7 +2613,7 @@ void LocationsBuilderARM::VisitSub(HSub* sub) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -2644,13 +2649,18 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) { } case Primitive::kPrimLong: { - DCHECK(second.IsRegisterPair()); - __ subs(out.AsRegisterPairLow<Register>(), - first.AsRegisterPairLow<Register>(), - ShifterOperand(second.AsRegisterPairLow<Register>())); - __ sbc(out.AsRegisterPairHigh<Register>(), - first.AsRegisterPairHigh<Register>(), - ShifterOperand(second.AsRegisterPairHigh<Register>())); + if (second.IsConstant()) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); + GenerateAddLongConst(out, first, -value); + } else { + DCHECK(second.IsRegisterPair()); + __ subs(out.AsRegisterPairLow<Register>(), + first.AsRegisterPairLow<Register>(), + ShifterOperand(second.AsRegisterPairLow<Register>())); + __ sbc(out.AsRegisterPairHigh<Register>(), + first.AsRegisterPairHigh<Register>(), + ShifterOperand(second.AsRegisterPairHigh<Register>())); + } break; } @@ -4052,31 +4062,51 @@ bool LocationsBuilderARM::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) { uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst)); if (Primitive::Is64BitType(input_cst->GetType())) { - return CanEncodeConstantAsImmediate(Low32Bits(value), opcode) && - CanEncodeConstantAsImmediate(High32Bits(value), opcode); + Opcode high_opcode = opcode; + SetCc low_set_cc = kCcDontCare; + switch (opcode) { + case SUB: + // Flip the operation to an ADD. + value = -value; + opcode = ADD; + FALLTHROUGH_INTENDED; + case ADD: + if (Low32Bits(value) == 0u) { + return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare); + } + high_opcode = ADC; + low_set_cc = kCcSet; + break; + default: + break; + } + return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) && + CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare); } else { return CanEncodeConstantAsImmediate(Low32Bits(value), opcode); } } -bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode) { +bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, + Opcode opcode, + SetCc set_cc) { ShifterOperand so; ArmAssembler* assembler = codegen_->GetAssembler(); - if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, &so)) { + if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, set_cc, &so)) { return true; } Opcode neg_opcode = kNoOperand; switch (opcode) { - case AND: - neg_opcode = BIC; - break; - case ORR: - neg_opcode = ORN; - break; + case AND: neg_opcode = BIC; value = ~value; break; + case ORR: neg_opcode = ORN; value = ~value; break; + case ADD: neg_opcode = SUB; value = -value; break; + case ADC: neg_opcode = SBC; value = ~value; break; + case SUB: neg_opcode = ADD; value = -value; break; + case SBC: neg_opcode = ADC; value = ~value; break; default: return false; } - return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, ~value, &so); + return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, value, set_cc, &so); } void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, @@ -6202,6 +6232,34 @@ void InstructionCodeGeneratorARM::GenerateEorConst(Register out, Register first, __ eor(out, first, ShifterOperand(value)); } +void InstructionCodeGeneratorARM::GenerateAddLongConst(Location out, + Location first, + uint64_t value) { + Register out_low = out.AsRegisterPairLow<Register>(); + Register out_high = out.AsRegisterPairHigh<Register>(); + Register first_low = first.AsRegisterPairLow<Register>(); + Register first_high = first.AsRegisterPairHigh<Register>(); + uint32_t value_low = Low32Bits(value); + uint32_t value_high = High32Bits(value); + if (value_low == 0u) { + if (out_low != first_low) { + __ mov(out_low, ShifterOperand(first_low)); + } + __ AddConstant(out_high, first_high, value_high); + return; + } + __ AddConstantSetFlags(out_low, first_low, value_low); + ShifterOperand so; + if (__ ShifterOperandCanHold(out_high, first_high, ADC, value_high, kCcDontCare, &so)) { + __ adc(out_high, first_high, so); + } else if (__ ShifterOperandCanHold(out_low, first_low, SBC, ~value_high, kCcDontCare, &so)) { + __ sbc(out_high, first_high, so); + } else { + LOG(FATAL) << "Unexpected constant " << value_high; + UNREACHABLE(); + } +} + void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instruction) { LocationSummary* locations = instruction->GetLocations(); Location first = locations->InAt(0); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index fa7709b9a3..5d9b2dce1c 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -183,7 +183,7 @@ class LocationsBuilderARM : public HGraphVisitor { Location ArithmeticZeroOrFpuRegister(HInstruction* input); Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode); bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode); - bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode); + bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare); CodeGeneratorARM* const codegen_; InvokeDexCallingConventionVisitorARM parameter_visitor_; @@ -220,6 +220,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { void GenerateAndConst(Register out, Register first, uint32_t value); void GenerateOrrConst(Register out, Register first, uint32_t value); void GenerateEorConst(Register out, Register first, uint32_t value); + void GenerateAddLongConst(Location out, Location first, uint64_t value); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleCondition(HCondition* condition); void HandleIntegerRotate(LocationSummary* locations); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index ade21174f4..a85cd54ff3 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -7099,12 +7099,6 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // /* LockWord */ lock_word = LockWord(monitor) static_assert(sizeof(LockWord) == sizeof(int32_t), "art::LockWord and int32_t have different sizes."); - // /* uint32_t */ rb_state = lock_word.ReadBarrierState() - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); - __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); - static_assert( - LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, - "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86 memory model. @@ -7124,8 +7118,13 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // if (rb_state == ReadBarrier::gray_ptr_) // ref = ReadBarrier::Mark(ref); - __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); - __ j(kEqual, slow_path->GetEntryLabel()); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with SHR. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); + __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index eadb431440..e0013634f1 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -6551,12 +6551,6 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // /* LockWord */ lock_word = LockWord(monitor) static_assert(sizeof(LockWord) == sizeof(int32_t), "art::LockWord and int32_t have different sizes."); - // /* uint32_t */ rb_state = lock_word.ReadBarrierState() - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); - __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); - static_assert( - LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, - "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86-64 memory model. @@ -6576,8 +6570,13 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // if (rb_state == ReadBarrier::gray_ptr_) // ref = ReadBarrier::Mark(ref); - __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); - __ j(kEqual, slow_path->GetEntryLabel()); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with SHR. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); + __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 3429a8fdbb..1a8eb58857 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -27,9 +27,6 @@ namespace art { class CompilerDriver; class DexFile; -// Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751 -static constexpr bool kRoundIsPlusPointFive = false; - // Positive floating-point infinities. static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U; static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index e7c40e6600..e233672705 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1160,8 +1160,10 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); - Register str = XRegisterFrom(locations->InAt(0)); - Register arg = XRegisterFrom(locations->InAt(1)); + Register str = InputRegisterAt(invoke, 0); + Register arg = InputRegisterAt(invoke, 1); + DCHECK(str.IsW()); + DCHECK(arg.IsW()); Register out = OutputRegister(invoke); Register temp0 = WRegisterFrom(locations->GetTemp(0)); @@ -1192,8 +1194,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Subs(out, str, arg); __ B(&end, eq); // Load lengths of this and argument strings. - __ Ldr(temp0, MemOperand(str.X(), count_offset)); - __ Ldr(temp1, MemOperand(arg.X(), count_offset)); + __ Ldr(temp0, HeapOperand(str, count_offset)); + __ Ldr(temp1, HeapOperand(arg, count_offset)); // Return zero if both strings are empty. __ Orr(out, temp0, temp1); __ Cbz(out, &end); @@ -1222,8 +1224,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { // Loop to compare 4x16-bit characters at a time (ok because of string data alignment). __ Bind(&loop); - __ Ldr(temp4, MemOperand(str.X(), temp1)); - __ Ldr(temp0, MemOperand(arg.X(), temp1)); + __ Ldr(temp4, MemOperand(str.X(), temp1.X())); + __ Ldr(temp0, MemOperand(arg.X(), temp1.X())); __ Cmp(temp4, temp0); __ B(ne, &find_char_diff); __ Add(temp1, temp1, char_size * 4); @@ -1242,14 +1244,14 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Clz(temp1, temp1); // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then // the difference occurs outside the remaining string data, so just return length diff (out). - __ Cmp(temp2, Operand(temp1, LSR, 4)); + __ Cmp(temp2, Operand(temp1.W(), LSR, 4)); __ B(le, &end); // Extract the characters and calculate the difference. __ Bic(temp1, temp1, 0xf); __ Lsr(temp0, temp0, temp1); __ Lsr(temp4, temp4, temp1); __ And(temp4, temp4, 0xffff); - __ Sub(out, temp4, Operand(temp0, UXTH)); + __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH)); __ Bind(&end); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index dc409c92d6..22f4181b92 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -753,11 +753,6 @@ void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } - // Do we have instruction support? if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); @@ -795,7 +790,6 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { } XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); - Register constant_area = locations->InAt(1).AsRegister<Register>(); XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); Register out = locations->Out().AsRegister<Register>(); @@ -810,10 +804,23 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { __ movss(t2, in); __ roundss(t1, in, Immediate(1)); __ subss(t2, t1); - __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area)); - __ j(kBelow, &skip_incr); - __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area)); - __ Bind(&skip_incr); + if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { + // Direct constant area available. + Register constant_area = locations->InAt(1).AsRegister<Register>(); + __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area)); + __ j(kBelow, &skip_incr); + __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area)); + __ Bind(&skip_incr); + } else { + // No constant area: go through stack. + __ pushl(Immediate(bit_cast<int32_t, float>(0.5f))); + __ pushl(Immediate(bit_cast<int32_t, float>(1.0f))); + __ comiss(t2, Address(ESP, 4)); + __ j(kBelow, &skip_incr); + __ addss(t1, Address(ESP, 0)); + __ Bind(&skip_incr); + __ addl(ESP, Immediate(8)); + } // Final conversion to an integer. Unfortunately this also does not have a // direct x86 instruction, since NaN should map to 0 and large positive diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 7dfbfb09be..ab8b05c3d4 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -598,10 +598,6 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } CreateSSE41FPToIntLocations(arena_, invoke, codegen_); } @@ -646,10 +642,6 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } CreateSSE41FPToIntLocations(arena_, invoke, codegen_); } diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 79ca5a0d86..4f043f5d03 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -37,6 +37,158 @@ static constexpr size_t kMaxNumRegs = 32; // intervals are split when coloring fails. static constexpr size_t kMaxGraphColoringAttemptsDebug = 100; +// We always want to avoid spilling inside loops. +static constexpr size_t kLoopSpillWeightMultiplier = 10; + +// If we avoid moves in single jump blocks, we can avoid jumps to jumps. +static constexpr size_t kSingleJumpBlockWeightMultiplier = 2; + +// We avoid moves in blocks that dominate the exit block, since these blocks will +// be executed on every path through the method. +static constexpr size_t kDominatesExitBlockWeightMultiplier = 2; + +enum class CoalesceKind { + kAdjacentSibling, // Prevents moves at interval split points. + kFixedOutputSibling, // Prevents moves from a fixed output location. + kFixedInput, // Prevents moves into a fixed input location. + kNonlinearControlFlow, // Prevents moves between blocks. + kPhi, // Prevents phi resolution moves. + kFirstInput, // Prevents a single input move. + kAnyInput, // May lead to better instruction selection / smaller encodings. +}; + +std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) { + return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind); +} + +static size_t LoopDepthAt(HBasicBlock* block) { + HLoopInformation* loop_info = block->GetLoopInformation(); + size_t depth = 0; + while (loop_info != nullptr) { + ++depth; + loop_info = loop_info->GetPreHeader()->GetLoopInformation(); + } + return depth; +} + +// Return the runtime cost of inserting a move instruction at the specified location. +static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) { + HBasicBlock* block = liveness.GetBlockFromPosition(position / 2); + DCHECK(block != nullptr); + size_t cost = 1; + if (block->IsSingleJump()) { + cost *= kSingleJumpBlockWeightMultiplier; + } + if (block->Dominates(block->GetGraph()->GetExitBlock())) { + cost *= kDominatesExitBlockWeightMultiplier; + } + for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) { + cost *= kLoopSpillWeightMultiplier; + } + return cost; +} + +// In general, we estimate coalesce priority by whether it will definitely avoid a move, +// and by how likely it is to create an interference graph that's harder to color. +static size_t ComputeCoalescePriority(CoalesceKind kind, + size_t position, + const SsaLivenessAnalysis& liveness) { + if (kind == CoalesceKind::kAnyInput) { + // This type of coalescing can affect instruction selection, but not moves, so we + // give it the lowest priority. + return 0; + } else { + return CostForMoveAt(position, liveness); + } +} + +enum class CoalesceStage { + kWorklist, // Currently in the iterative coalescing worklist. + kActive, // Not in a worklist, but could be considered again during iterative coalescing. + kInactive, // No longer considered until last-chance coalescing. + kDefunct, // Either the two nodes interfere, or have already been coalesced. +}; + +std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) { + return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage); +} + +// Represents a coalesce opportunity between two nodes. +struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> { + CoalesceOpportunity(InterferenceNode* a, + InterferenceNode* b, + CoalesceKind kind, + size_t position, + const SsaLivenessAnalysis& liveness) + : node_a(a), + node_b(b), + stage(CoalesceStage::kWorklist), + priority(ComputeCoalescePriority(kind, position, liveness)) {} + + InterferenceNode* const node_a; + InterferenceNode* const node_b; + + // The current stage of this coalesce opportunity, indicating whether it is in a worklist, + // and whether it should still be considered. + CoalesceStage stage; + + // The priority of this coalesce opportunity, based on heuristics. + const size_t priority; +}; + +enum class NodeStage { + kInitial, // Uninitialized. + kPrecolored, // Marks fixed nodes. + kSafepoint, // Marks safepoint nodes. + kPrunable, // Marks uncolored nodes in the interference graph. + kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers. + kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers. + kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers. + kPruned // Marks nodes already pruned from the interference graph. +}; + +std::ostream& operator<<(std::ostream& os, const NodeStage& stage) { + return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage); +} + +// Returns the estimated cost of spilling a particular live interval. +static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) { + if (interval->HasRegister()) { + // Intervals with a fixed register cannot be spilled. + return std::numeric_limits<float>::min(); + } + + size_t length = interval->GetLength(); + if (length == 1) { + // Tiny intervals should have maximum priority, since they cannot be split any further. + return std::numeric_limits<float>::max(); + } + + size_t use_weight = 0; + if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) { + // Cost for spilling at a register definition point. + use_weight += CostForMoveAt(interval->GetStart() + 1, liveness); + } + + UsePosition* use = interval->GetFirstUse(); + while (use != nullptr && use->GetPosition() <= interval->GetStart()) { + // Skip uses before the start of this live interval. + use = use->GetNext(); + } + + while (use != nullptr && use->GetPosition() <= interval->GetEnd()) { + if (use->GetUser() != nullptr && use->RequiresRegister()) { + // Cost for spilling at a register use point. + use_weight += CostForMoveAt(use->GetUser()->GetLifetimePosition() - 1, liveness); + } + use = use->GetNext(); + } + + // We divide by the length of the interval because we want to prioritize + // short intervals; we do not benefit much if we split them further. + return static_cast<float>(use_weight) / static_cast<float>(length); +} + // Interference nodes make up the interference graph, which is the primary data structure in // graph coloring register allocation. Each node represents a single live interval, and contains // a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory, @@ -58,42 +210,70 @@ static constexpr size_t kMaxGraphColoringAttemptsDebug = 100; // and thus whether it is safe to prune it from the interference graph early on. class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { public: - InterferenceNode(ArenaAllocator* allocator, LiveInterval* interval, size_t id) - : interval_(interval), - adjacent_nodes_(CmpPtr, allocator->Adapter(kArenaAllocRegisterAllocator)), - out_degree_(0), - id_(id) {} - - // Used to maintain determinism when storing InterferenceNode pointers in sets. - static bool CmpPtr(const InterferenceNode* lhs, const InterferenceNode* rhs) { - return lhs->id_ < rhs->id_; + InterferenceNode(ArenaAllocator* allocator, + LiveInterval* interval, + size_t id, + const SsaLivenessAnalysis& liveness) + : stage(NodeStage::kInitial), + interval_(interval), + adjacent_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), + coalesce_opportunities_(allocator->Adapter(kArenaAllocRegisterAllocator)), + out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0), + id_(id), + alias_(this), + spill_weight_(ComputeSpillWeight(interval, liveness)), + requires_color_(interval->RequiresRegister()) { + DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval"; } - void AddInterference(InterferenceNode* other) { - if (adjacent_nodes_.insert(other).second) { + void AddInterference(InterferenceNode* other, bool guaranteed_not_interfering_yet) { + DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences"; + DCHECK_NE(this, other) << "Should not create self loops in the interference graph"; + DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another"; + DCHECK_NE(stage, NodeStage::kPruned); + DCHECK_NE(other->stage, NodeStage::kPruned); + if (guaranteed_not_interfering_yet) { + DCHECK(std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other) + == adjacent_nodes_.end()); + adjacent_nodes_.push_back(other); out_degree_ += EdgeWeightWith(other); + } else { + auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other); + if (it == adjacent_nodes_.end()) { + adjacent_nodes_.push_back(other); + out_degree_ += EdgeWeightWith(other); + } } } void RemoveInterference(InterferenceNode* other) { - if (adjacent_nodes_.erase(other) > 0) { + DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node"; + DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning"; + auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other); + if (it != adjacent_nodes_.end()) { + adjacent_nodes_.erase(it); out_degree_ -= EdgeWeightWith(other); } } bool ContainsInterference(InterferenceNode* other) const { - return adjacent_nodes_.count(other) > 0; + DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences"; + DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences"; + auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other); + return it != adjacent_nodes_.end(); } LiveInterval* GetInterval() const { return interval_; } - const ArenaSet<InterferenceNode*, decltype(&CmpPtr)>& GetAdjacentNodes() const { + const ArenaVector<InterferenceNode*>& GetAdjacentNodes() const { return adjacent_nodes_; } size_t GetOutDegree() const { + // Pre-colored nodes have infinite degree. + DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max()); return out_degree_; } @@ -101,41 +281,109 @@ class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { return id_; } - private: + void AddCoalesceOpportunity(CoalesceOpportunity* opportunity) { + coalesce_opportunities_.push_back(opportunity); + } + + void ClearCoalesceOpportunities() { + coalesce_opportunities_.clear(); + } + + bool IsMoveRelated() const { + for (CoalesceOpportunity* opportunity : coalesce_opportunities_) { + if (opportunity->stage == CoalesceStage::kWorklist || + opportunity->stage == CoalesceStage::kActive) { + return true; + } + } + return false; + } + + // Return whether this node already has a color. + // Used to find fixed nodes in the interference graph before coloring. + bool IsPrecolored() const { + return interval_->HasRegister(); + } + + bool IsPair() const { + return interval_->HasHighInterval(); + } + + void SetAlias(InterferenceNode* rep) { + DCHECK_NE(rep->stage, NodeStage::kPruned); + DCHECK_EQ(this, alias_) << "Should only set a node's alias once"; + alias_ = rep; + } + + InterferenceNode* GetAlias() { + if (alias_ != this) { + // Recurse in order to flatten tree of alias pointers. + alias_ = alias_->GetAlias(); + } + return alias_; + } + + const ArenaVector<CoalesceOpportunity*>& GetCoalesceOpportunities() const { + return coalesce_opportunities_; + } + + float GetSpillWeight() const { + return spill_weight_; + } + + bool RequiresColor() const { + return requires_color_; + } + // We give extra weight to edges adjacent to pair nodes. See the general comment on the // interference graph above. - size_t EdgeWeightWith(InterferenceNode* other) const { - return (interval_->HasHighInterval() || other->interval_->HasHighInterval()) ? 2 : 1; + size_t EdgeWeightWith(const InterferenceNode* other) const { + return (IsPair() || other->IsPair()) ? 2 : 1; } + // The current stage of this node, indicating which worklist it belongs to. + NodeStage stage; + + private: // The live interval that this node represents. LiveInterval* const interval_; // All nodes interfering with this one. - // TODO: There is potential to use a cheaper data structure here, especially since - // adjacency sets will usually be small. - ArenaSet<InterferenceNode*, decltype(&CmpPtr)> adjacent_nodes_; + // We use an unsorted vector as a set, since a tree or hash set is too heavy for the + // set sizes that we encounter. Using a vector leads to much better performance. + ArenaVector<InterferenceNode*> adjacent_nodes_; + + // Interference nodes that this node should be coalesced with to reduce moves. + ArenaVector<CoalesceOpportunity*> coalesce_opportunities_; // The maximum number of colors with which this node could interfere. This could be more than // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes. // We use "out" degree because incoming edges come from nodes already pruned from the graph, // and do not affect the coloring of this node. + // Pre-colored nodes are treated as having infinite degree. size_t out_degree_; // A unique identifier for this node, used to maintain determinism when storing // interference nodes in sets. const size_t id_; - // TODO: We could cache the result of interval_->RequiresRegister(), since it - // will not change for the lifetime of this node. (Currently, RequiresRegister() requires - // iterating through all uses of a live interval.) + // The node representing this node in the interference graph. + // Initially set to `this`, and only changed if this node is coalesced into another. + InterferenceNode* alias_; + + // The cost of splitting and spilling this interval to the stack. + // Nodes with a higher spill weight should be prioritized when assigning registers. + // This is essentially based on use density and location; short intervals with many uses inside + // deeply nested loops have a high spill weight. + const float spill_weight_; + + const bool requires_color_; DISALLOW_COPY_AND_ASSIGN(InterferenceNode); }; static bool IsCoreInterval(LiveInterval* interval) { - return interval->GetType() != Primitive::kPrimFloat - && interval->GetType() != Primitive::kPrimDouble; + return !Primitive::IsFloatingPointType(interval->GetType()); } static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) { @@ -144,14 +392,16 @@ static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) { RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocator, CodeGenerator* codegen, - const SsaLivenessAnalysis& liveness) + const SsaLivenessAnalysis& liveness, + bool iterative_move_coalescing) : RegisterAllocator(allocator, codegen, liveness), + iterative_move_coalescing_(iterative_move_coalescing), core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)), - physical_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - physical_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), + physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), int_spill_slot_counter_(0), double_spill_slot_counter_(0), float_spill_slot_counter_(0), @@ -163,16 +413,27 @@ RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocat number_of_globally_blocked_fp_regs_(0), max_safepoint_live_core_regs_(0), max_safepoint_live_fp_regs_(0), - coloring_attempt_allocator_(nullptr) { + coloring_attempt_allocator_(nullptr), + node_id_counter_(0), + interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)), + prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), + pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), + simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), + freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), + spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)), + coalesce_worklist_(CmpCoalesceOpportunity, + allocator->Adapter(kArenaAllocRegisterAllocator)) { // Before we ask for blocked registers, set them up in the code generator. codegen->SetupBlockedRegisters(); // Initialize physical core register live intervals and blocked registers. // This includes globally blocked registers, such as the stack pointer. - physical_core_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr); - for (size_t i = 0; i < codegen->GetNumberOfCoreRegisters(); ++i) { + physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr); + for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimInt); - physical_core_intervals_[i] = interval; + physical_core_nodes_[i] = + new (allocator_) InterferenceNode(allocator_, interval, node_id_counter_++, liveness); + physical_core_nodes_[i]->stage = NodeStage::kPrecolored; core_intervals_.push_back(interval); if (codegen_->IsBlockedCoreRegister(i)) { ++number_of_globally_blocked_core_regs_; @@ -180,10 +441,12 @@ RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocat } } // Initialize physical floating point register live intervals and blocked registers. - physical_fp_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr); - for (size_t i = 0; i < codegen->GetNumberOfFloatingPointRegisters(); ++i) { + physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr); + for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimFloat); - physical_fp_intervals_[i] = interval; + physical_fp_nodes_[i] = + new (allocator_) InterferenceNode(allocator_, interval, node_id_counter_++, liveness); + physical_fp_nodes_[i]->stage = NodeStage::kPrecolored; fp_intervals_.push_back(interval); if (codegen_->IsBlockedFloatingPointRegister(i)) { ++number_of_globally_blocked_fp_regs_; @@ -213,24 +476,58 @@ void RegisterAllocatorGraphColor::AllocateRegisters() { << "which could be caused by prioritizing the wrong live intervals. (Short intervals " << "should be prioritized over long ones, because they cannot be split further.)"; - // Reset the allocator for the next coloring attempt. + // Reset the allocator and fixed nodes for the next coloring attempt. ArenaAllocator coloring_attempt_allocator(allocator_->GetArenaPool()); coloring_attempt_allocator_ = &coloring_attempt_allocator; + for (InterferenceNode* node : physical_core_nodes_) { + node->ClearCoalesceOpportunities(); + } + for (InterferenceNode* node : physical_fp_nodes_) { + node->ClearCoalesceOpportunities(); + } - // (2) Build the interference graph. - ArenaVector<InterferenceNode*> prunable_nodes( + // Clear data structures. + // TODO: One alternative idea is to create a separate struct that contains all of these + // data structures, and is passed around to each graph coloring function. + interval_node_map_ = ArenaHashMap<LiveInterval*, InterferenceNode*>( + coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); + prunable_nodes_ = ArenaVector<InterferenceNode*>( + coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); + pruned_nodes_ = ArenaStdStack<InterferenceNode*>( coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); + simplify_worklist_ = ArenaDeque<InterferenceNode*>( + coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); + freeze_worklist_ = ArenaDeque<InterferenceNode*>( + coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); + spill_worklist_ = ArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)>( + HasGreaterNodePriority, coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); + coalesce_worklist_ = + ArenaPriorityQueue<CoalesceOpportunity*, decltype(&CmpCoalesceOpportunity)>( + CmpCoalesceOpportunity, + coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); + + // (2) Build the interference graph. Also gather safepoints and build the interval node map. ArenaVector<InterferenceNode*> safepoints( coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); - BuildInterferenceGraph(intervals, &prunable_nodes, &safepoints); + ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs + ? physical_core_nodes_ + : physical_fp_nodes_; + BuildInterferenceGraph(intervals, physical_nodes, &safepoints); + + // (3) Add coalesce opportunities. + // If we have tried coloring the graph a suspiciously high number of times, give + // up on move coalescing, just in case the coalescing heuristics are not conservative. + // (This situation will be caught if DCHECKs are turned on.) + if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) { + FindCoalesceOpportunities(); + } - // (3) Prune all uncolored nodes from interference graph. - ArenaStdStack<InterferenceNode*> pruned_nodes( - coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); - PruneInterferenceGraph(prunable_nodes, num_registers, &pruned_nodes); + // (4) Prune all uncolored nodes from interference graph. + PruneInterferenceGraph(num_registers); - // (4) Color pruned nodes based on interferences. - bool successful = ColorInterferenceGraph(&pruned_nodes, num_registers); + // (5) Color pruned nodes based on interferences. + bool successful = ColorInterferenceGraph(num_registers, + processing_core_regs); if (successful) { // Compute the maximum number of live registers across safepoints. @@ -250,7 +547,7 @@ void RegisterAllocatorGraphColor::AllocateRegisters() { // We only look at prunable_nodes because we already told the code generator about // fixed intervals while processing instructions. We also ignore the fixed intervals // placed at the top of catch blocks. - for (InterferenceNode* node : prunable_nodes) { + for (InterferenceNode* node : prunable_nodes_) { LiveInterval* interval = node->GetInterval(); if (interval->HasRegister()) { Location low_reg = processing_core_regs @@ -275,7 +572,7 @@ void RegisterAllocatorGraphColor::AllocateRegisters() { } // while unsuccessful } // for processing_core_instructions - // (5) Resolve locations and deconstruct SSA form. + // (6) Resolve locations and deconstruct SSA form. RegisterAllocationResolver(allocator_, codegen_, liveness_) .Resolve(max_safepoint_live_core_regs_, max_safepoint_live_fp_regs_, @@ -304,11 +601,12 @@ bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) { } } - ArenaVector<LiveInterval*>& physical_intervals = processing_core_regs - ? physical_core_intervals_ - : physical_fp_intervals_; - for (LiveInterval* fixed : physical_intervals) { - if (fixed->GetFirstRange() != nullptr) { + ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs + ? physical_core_nodes_ + : physical_fp_nodes_; + for (InterferenceNode* fixed : physical_nodes) { + LiveInterval* interval = fixed->GetInterval(); + if (interval->GetFirstRange() != nullptr) { // Ideally we would check fixed ranges as well, but currently there are times when // two fixed intervals for the same register will overlap. For example, a fixed input // and a fixed output may sometimes share the same register, in which there will be two @@ -358,7 +656,8 @@ void RegisterAllocatorGraphColor::ProcessInstructions() { ProcessInstruction(phi_it.Current()); } - if (block->IsCatchBlock() || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { + if (block->IsCatchBlock() + || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { // By blocking all registers at the top of each catch block or irreducible loop, we force // intervals belonging to the live-in set of the catch/header block to be spilled. // TODO(ngeoffray): Phis in this block could be allocated in register. @@ -435,7 +734,9 @@ void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) // TODO: Ideally we would coalesce the physical register with the register // allocated to the input value, but this can be tricky if, e.g., there // could be multiple physical register uses of the same value at the - // same instruction. Need to think about it more. + // same instruction. Furthermore, there's currently no distinction between + // fixed inputs to a call (which will be clobbered) and other fixed inputs (which + // may not be clobbered). LocationSummary* locations = instruction->GetLocations(); size_t position = instruction->GetLifetimePosition(); for (size_t i = 0; i < locations->GetInputCount(); ++i) { @@ -639,8 +940,8 @@ void RegisterAllocatorGraphColor::BlockRegister(Location location, DCHECK(location.IsRegister() || location.IsFpuRegister()); int reg = location.reg(); LiveInterval* interval = location.IsRegister() - ? physical_core_intervals_[reg] - : physical_fp_intervals_[reg]; + ? physical_core_nodes_[reg]->GetInterval() + : physical_fp_nodes_[reg]->GetInterval(); DCHECK(interval->GetRegister() == reg); bool blocked_by_codegen = location.IsRegister() ? codegen_->IsBlockedCoreRegister(reg) @@ -666,28 +967,104 @@ void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool } } -// Add an interference edge, but only if necessary. -static void AddPotentialInterference(InterferenceNode* from, InterferenceNode* to) { - if (from->GetInterval()->HasRegister()) { +void RegisterAllocatorGraphColor::AddPotentialInterference(InterferenceNode* from, + InterferenceNode* to, + bool guaranteed_not_interfering_yet, + bool both_directions) { + if (from->IsPrecolored()) { // We save space by ignoring outgoing edges from fixed nodes. } else if (to->GetInterval()->IsSlowPathSafepoint()) { // Safepoint intervals are only there to count max live registers, // so no need to give them incoming interference edges. // This is also necessary for correctness, because we don't want nodes // to remove themselves from safepoint adjacency sets when they're pruned. + } else if (to->IsPrecolored()) { + // It is important that only a single node represents a given fixed register in the + // interference graph. We retrieve that node here. + const ArenaVector<InterferenceNode*>& physical_nodes = + to->GetInterval()->IsFloatingPoint() ? physical_fp_nodes_ : physical_core_nodes_; + InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()]; + from->AddInterference(physical_node, /*guaranteed_not_interfering_yet*/ false); + DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister()); + DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node"; + + // If a node interferes with a fixed pair node, the weight of the edge may + // be inaccurate after using the alias of the pair node, because the alias of the pair node + // is a singular node. + // We could make special pair fixed nodes, but that ends up being too conservative because + // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of + // three rather than two. + // Instead, we explicitly add an interference with the high node of the fixed pair node. + // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals + // can be unaligned on x86 complicates things. + if (to->IsPair()) { + InterferenceNode* high_node = + physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()]; + DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(), + high_node->GetInterval()->GetRegister()); + from->AddInterference(high_node, /*guaranteed_not_interfering_yet*/ false); + } } else { - from->AddInterference(to); + // Standard interference between two uncolored nodes. + from->AddInterference(to, guaranteed_not_interfering_yet); + } + + if (both_directions) { + AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false); } } -// TODO: See locations->OutputCanOverlapWithInputs(); we may want to consider -// this when building the interference graph. +// Returns true if `in_node` represents an input interval of `out_node`, and the output interval +// is allowed to have the same register as the input interval. +// TODO: Ideally we should just produce correct intervals in liveness analysis. +// We would need to refactor the current live interval layout to do so, which is +// no small task. +static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) { + LiveInterval* output_interval = out_node->GetInterval(); + HInstruction* defined_by = output_interval->GetDefinedBy(); + if (defined_by == nullptr) { + // This must not be a definition point. + return false; + } + + LocationSummary* locations = defined_by->GetLocations(); + if (locations->OutputCanOverlapWithInputs()) { + // This instruction does not allow the output to reuse a register from an input. + return false; + } + + LiveInterval* input_interval = in_node->GetInterval(); + LiveInterval* next_sibling = input_interval->GetNextSibling(); + size_t def_position = defined_by->GetLifetimePosition(); + size_t use_position = def_position + 1; + if (next_sibling != nullptr && next_sibling->GetStart() == use_position) { + // The next sibling starts at the use position, so reusing the input register in the output + // would clobber the input before it's moved into the sibling interval location. + return false; + } + + if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) { + // The input interval is live after the use position. + return false; + } + + HInputsRef inputs = defined_by->GetInputs(); + for (size_t i = 0; i < inputs.size(); ++i) { + if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) { + DCHECK(input_interval->SameRegisterKind(*output_interval)); + return true; + } + } + + // The input interval was not an input for this instruction. + return false; +} + void RegisterAllocatorGraphColor::BuildInterferenceGraph( const ArenaVector<LiveInterval*>& intervals, - ArenaVector<InterferenceNode*>* prunable_nodes, + const ArenaVector<InterferenceNode*>& physical_nodes, ArenaVector<InterferenceNode*>* safepoints) { - size_t interval_id_counter = 0; - + DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty()); // Build the interference graph efficiently by ordering range endpoints // by position and doing a linear sweep to find interferences. (That is, we // jump from endpoint to endpoint, maintaining a set of intervals live at each @@ -702,20 +1079,33 @@ void RegisterAllocatorGraphColor::BuildInterferenceGraph( // Tuple contents: (position, is_range_beginning, node). ArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints( coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); + + // We reserve plenty of space to avoid excessive copying. + range_endpoints.reserve(4 * prunable_nodes_.size()); + for (LiveInterval* parent : intervals) { for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) { LiveRange* range = sibling->GetFirstRange(); if (range != nullptr) { InterferenceNode* node = new (coloring_attempt_allocator_) InterferenceNode( - coloring_attempt_allocator_, sibling, interval_id_counter++); + coloring_attempt_allocator_, sibling, node_id_counter_++, liveness_); + interval_node_map_.Insert(std::make_pair(sibling, node)); + if (sibling->HasRegister()) { - // Fixed nodes will never be pruned, so no need to keep track of them. + // Fixed nodes should alias the canonical node for the corresponding register. + node->stage = NodeStage::kPrecolored; + InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()]; + node->SetAlias(physical_node); + DCHECK_EQ(node->GetInterval()->GetRegister(), + physical_node->GetInterval()->GetRegister()); } else if (sibling->IsSlowPathSafepoint()) { // Safepoint intervals are synthesized to count max live registers. // They will be processed separately after coloring. + node->stage = NodeStage::kSafepoint; safepoints->push_back(node); } else { - prunable_nodes->push_back(node); + node->stage = NodeStage::kPrunable; + prunable_nodes_.push_back(node); } while (range != nullptr) { @@ -728,11 +1118,18 @@ void RegisterAllocatorGraphColor::BuildInterferenceGraph( } // Sort the endpoints. - std::sort(range_endpoints.begin(), range_endpoints.end()); + // We explicitly ignore the third entry of each tuple (the node pointer) in order + // to maintain determinism. + std::sort(range_endpoints.begin(), range_endpoints.end(), + [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs, + const std::tuple<size_t, bool, InterferenceNode*>& rhs) { + return std::tie(std::get<0>(lhs), std::get<1>(lhs)) + < std::tie(std::get<0>(rhs), std::get<1>(rhs)); + }); // Nodes live at the current position in the linear sweep. - ArenaSet<InterferenceNode*, decltype(&InterferenceNode::CmpPtr)> live( - InterferenceNode::CmpPtr, coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); + ArenaVector<InterferenceNode*> live( + coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the // live set. When we encounter the end of a range, we remove the corresponding node @@ -740,133 +1137,539 @@ void RegisterAllocatorGraphColor::BuildInterferenceGraph( for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) { bool is_range_beginning; InterferenceNode* node; + size_t position; // Extract information from the tuple, including the node this tuple represents. - std::tie(std::ignore, is_range_beginning, node) = *it; + std::tie(position, is_range_beginning, node) = *it; if (is_range_beginning) { + bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart(); for (InterferenceNode* conflicting : live) { DCHECK_NE(node, conflicting); - AddPotentialInterference(node, conflicting); - AddPotentialInterference(conflicting, node); + if (CheckInputOutputCanOverlap(conflicting, node)) { + // We do not add an interference, because the instruction represented by `node` allows + // its output to share a register with an input, represented here by `conflicting`. + } else { + AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet); + } } - DCHECK_EQ(live.count(node), 0u); - live.insert(node); + DCHECK(std::find(live.begin(), live.end(), node) == live.end()); + live.push_back(node); } else { // End of range. - DCHECK_EQ(live.count(node), 1u); - live.erase(node); + auto live_it = std::find(live.begin(), live.end(), node); + DCHECK(live_it != live.end()); + live.erase(live_it); } } DCHECK(live.empty()); } -// The order in which we color nodes is vital to both correctness (forward -// progress) and code quality. Specifically, we must prioritize intervals -// that require registers, and after that we must prioritize short intervals. -// That way, if we fail to color a node, it either won't require a register, -// or it will be a long interval that can be split in order to make the +void RegisterAllocatorGraphColor::CreateCoalesceOpportunity(InterferenceNode* a, + InterferenceNode* b, + CoalesceKind kind, + size_t position) { + DCHECK_EQ(a->IsPair(), b->IsPair()) + << "Nodes of different memory widths should never be coalesced"; + CoalesceOpportunity* opportunity = + new (coloring_attempt_allocator_) CoalesceOpportunity(a, b, kind, position, liveness_); + a->AddCoalesceOpportunity(opportunity); + b->AddCoalesceOpportunity(opportunity); + coalesce_worklist_.push(opportunity); +} + +// When looking for coalesce opportunities, we use the interval_node_map_ to find the node +// corresponding to an interval. Note that not all intervals are in this map, notably the parents +// of constants and stack arguments. (However, these interval should not be involved in coalesce +// opportunities anyway, because they're not going to be in registers.) +void RegisterAllocatorGraphColor::FindCoalesceOpportunities() { + DCHECK(coalesce_worklist_.empty()); + + for (InterferenceNode* node : prunable_nodes_) { + LiveInterval* interval = node->GetInterval(); + + // Coalesce siblings. + LiveInterval* next_sibling = interval->GetNextSibling(); + if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) { + auto it = interval_node_map_.Find(next_sibling); + if (it != interval_node_map_.end()) { + InterferenceNode* sibling_node = it->second; + CreateCoalesceOpportunity(node, + sibling_node, + CoalesceKind::kAdjacentSibling, + interval->GetEnd()); + } + } + + // Coalesce fixed outputs with this interval if this interval is an adjacent sibling. + LiveInterval* parent = interval->GetParent(); + if (parent->HasRegister() + && parent->GetNextSibling() == interval + && parent->GetEnd() == interval->GetStart()) { + auto it = interval_node_map_.Find(parent); + if (it != interval_node_map_.end()) { + InterferenceNode* parent_node = it->second; + CreateCoalesceOpportunity(node, + parent_node, + CoalesceKind::kFixedOutputSibling, + parent->GetEnd()); + } + } + + // Try to prevent moves across blocks. + // Note that this does not lead to many succeeding coalesce attempts, so could be removed + // if found to add to compile time. + if (interval->IsSplit() && liveness_.IsAtBlockBoundary(interval->GetStart() / 2)) { + // If the start of this interval is at a block boundary, we look at the + // location of the interval in blocks preceding the block this interval + // starts at. This can avoid a move between the two blocks. + HBasicBlock* block = liveness_.GetBlockFromPosition(interval->GetStart() / 2); + for (HBasicBlock* predecessor : block->GetPredecessors()) { + size_t position = predecessor->GetLifetimeEnd() - 1; + LiveInterval* existing = interval->GetParent()->GetSiblingAt(position); + if (existing != nullptr) { + auto it = interval_node_map_.Find(existing); + if (it != interval_node_map_.end()) { + InterferenceNode* existing_node = it->second; + CreateCoalesceOpportunity(node, + existing_node, + CoalesceKind::kNonlinearControlFlow, + position); + } + } + } + } + + // Coalesce phi inputs with the corresponding output. + HInstruction* defined_by = interval->GetDefinedBy(); + if (defined_by != nullptr && defined_by->IsPhi()) { + const ArenaVector<HBasicBlock*>& predecessors = defined_by->GetBlock()->GetPredecessors(); + HInputsRef inputs = defined_by->GetInputs(); + + for (size_t i = 0, e = inputs.size(); i < e; ++i) { + // We want the sibling at the end of the appropriate predecessor block. + size_t position = predecessors[i]->GetLifetimeEnd() - 1; + LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position); + + auto it = interval_node_map_.Find(input_interval); + if (it != interval_node_map_.end()) { + InterferenceNode* input_node = it->second; + CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position); + } + } + } + + // Coalesce output with first input when policy is kSameAsFirstInput. + if (defined_by != nullptr) { + Location out = defined_by->GetLocations()->Out(); + if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) { + LiveInterval* input_interval + = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1); + // TODO: Could we consider lifetime holes here? + if (input_interval->GetEnd() == interval->GetStart()) { + auto it = interval_node_map_.Find(input_interval); + if (it != interval_node_map_.end()) { + InterferenceNode* input_node = it->second; + CreateCoalesceOpportunity(node, + input_node, + CoalesceKind::kFirstInput, + interval->GetStart()); + } + } + } + } + + // An interval that starts an instruction (that is, it is not split), may + // re-use the registers used by the inputs of that instruction, based on the + // location summary. + if (defined_by != nullptr) { + DCHECK(!interval->IsSplit()); + LocationSummary* locations = defined_by->GetLocations(); + if (!locations->OutputCanOverlapWithInputs()) { + HInputsRef inputs = defined_by->GetInputs(); + for (size_t i = 0; i < inputs.size(); ++i) { + size_t def_point = defined_by->GetLifetimePosition(); + // TODO: Getting the sibling at the def_point might not be quite what we want + // for fixed inputs, since the use will be *at* the def_point rather than after. + LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point); + if (input_interval != nullptr && + input_interval->HasHighInterval() == interval->HasHighInterval()) { + auto it = interval_node_map_.Find(input_interval); + if (it != interval_node_map_.end()) { + InterferenceNode* input_node = it->second; + CreateCoalesceOpportunity(node, + input_node, + CoalesceKind::kAnyInput, + interval->GetStart()); + } + } + } + } + } + + // Try to prevent moves into fixed input locations. + UsePosition* use = interval->GetFirstUse(); + for (; use != nullptr && use->GetPosition() <= interval->GetStart(); use = use->GetNext()) { + // Skip past uses before the start of this interval. + } + for (; use != nullptr && use->GetPosition() <= interval->GetEnd(); use = use->GetNext()) { + HInstruction* user = use->GetUser(); + if (user == nullptr) { + // User may be null for certain intervals, such as temp intervals. + continue; + } + LocationSummary* locations = user->GetLocations(); + Location input = locations->InAt(use->GetInputIndex()); + if (input.IsRegister() || input.IsFpuRegister()) { + // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes + // is currently not supported. + InterferenceNode* fixed_node = input.IsRegister() + ? physical_core_nodes_[input.reg()] + : physical_fp_nodes_[input.reg()]; + CreateCoalesceOpportunity(node, + fixed_node, + CoalesceKind::kFixedInput, + user->GetLifetimePosition()); + } + } + } // for node in prunable_nodes +} + +// The order in which we color nodes is important. To guarantee forward progress, +// we prioritize intervals that require registers, and after that we prioritize +// short intervals. That way, if we fail to color a node, it either won't require a +// register, or it will be a long interval that can be split in order to make the // interference graph sparser. +// To improve code quality, we prioritize intervals used frequently in deeply nested loops. +// (This metric is secondary to the forward progress requirements above.) // TODO: May also want to consider: -// - Loop depth // - Constants (since they can be rematerialized) // - Allocated spill slots -static bool GreaterNodePriority(const InterferenceNode* lhs, - const InterferenceNode* rhs) { - LiveInterval* lhs_interval = lhs->GetInterval(); - LiveInterval* rhs_interval = rhs->GetInterval(); - - // (1) Choose the interval that requires a register. - if (lhs_interval->RequiresRegister() != rhs_interval->RequiresRegister()) { - return lhs_interval->RequiresRegister(); +bool RegisterAllocatorGraphColor::HasGreaterNodePriority(const InterferenceNode* lhs, + const InterferenceNode* rhs) { + // (1) Prioritize the node that requires a color. + if (lhs->RequiresColor() != rhs->RequiresColor()) { + return lhs->RequiresColor(); } - // (2) Choose the interval that has a shorter life span. - if (lhs_interval->GetLength() != rhs_interval->GetLength()) { - return lhs_interval->GetLength() < rhs_interval->GetLength(); - } + // (2) Prioritize the interval that has a higher spill weight. + return lhs->GetSpillWeight() > rhs->GetSpillWeight(); +} + +bool RegisterAllocatorGraphColor::CmpCoalesceOpportunity(const CoalesceOpportunity* lhs, + const CoalesceOpportunity* rhs) { + return lhs->priority < rhs->priority; +} + +static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) { + return node->GetOutDegree() < num_regs; +} - // (3) Just choose the interval based on a deterministic ordering. - return InterferenceNode::CmpPtr(lhs, rhs); +static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) { + return !IsLowDegreeNode(node, num_regs); } -void RegisterAllocatorGraphColor::PruneInterferenceGraph( - const ArenaVector<InterferenceNode*>& prunable_nodes, - size_t num_regs, - ArenaStdStack<InterferenceNode*>* pruned_nodes) { +void RegisterAllocatorGraphColor::PruneInterferenceGraph(size_t num_regs) { + DCHECK(pruned_nodes_.empty() + && simplify_worklist_.empty() + && freeze_worklist_.empty() + && spill_worklist_.empty()); // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes, // and all others as high degree nodes. The distinction is important: low degree nodes are // guaranteed a color, while high degree nodes are not. - // Low-degree nodes are guaranteed a color, so worklist order does not matter. - ArenaDeque<InterferenceNode*> low_degree_worklist( - coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); - - // If we have to prune from the high-degree worklist, we cannot guarantee - // the pruned node a color. So, we order the worklist by priority. - ArenaSet<InterferenceNode*, decltype(&GreaterNodePriority)> high_degree_worklist( - GreaterNodePriority, coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); - - // Build worklists. - for (InterferenceNode* node : prunable_nodes) { - DCHECK(!node->GetInterval()->HasRegister()) - << "Fixed nodes should never be pruned"; - DCHECK(!node->GetInterval()->IsSlowPathSafepoint()) - << "Safepoint nodes should never be pruned"; - if (node->GetOutDegree() < num_regs) { - low_degree_worklist.push_back(node); - } else { - high_degree_worklist.insert(node); - } - } - - // Helper function to prune an interval from the interference graph, - // which includes updating the worklists. - auto prune_node = [this, - num_regs, - &pruned_nodes, - &low_degree_worklist, - &high_degree_worklist] (InterferenceNode* node) { - DCHECK(!node->GetInterval()->HasRegister()); - pruned_nodes->push(node); - for (InterferenceNode* adjacent : node->GetAdjacentNodes()) { - DCHECK(!adjacent->GetInterval()->IsSlowPathSafepoint()) - << "Nodes should never interfere with synthesized safepoint nodes"; - if (adjacent->GetInterval()->HasRegister()) { - // No effect on pre-colored nodes; they're never pruned. + // Build worklists. Note that the coalesce worklist has already been + // filled by FindCoalesceOpportunities(). + for (InterferenceNode* node : prunable_nodes_) { + DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned"; + DCHECK(!node->GetInterval()->IsSlowPathSafepoint()) << "Safepoint nodes should never be pruned"; + if (IsLowDegreeNode(node, num_regs)) { + if (node->GetCoalesceOpportunities().empty()) { + // Simplify Worklist. + node->stage = NodeStage::kSimplifyWorklist; + simplify_worklist_.push_back(node); } else { - bool was_high_degree = adjacent->GetOutDegree() >= num_regs; - DCHECK(adjacent->ContainsInterference(node)) - << "Missing incoming interference edge from non-fixed node"; - adjacent->RemoveInterference(node); - if (was_high_degree && adjacent->GetOutDegree() < num_regs) { - // This is a transition from high degree to low degree. - DCHECK_EQ(high_degree_worklist.count(adjacent), 1u); - high_degree_worklist.erase(adjacent); - low_degree_worklist.push_back(adjacent); - } + // Freeze Worklist. + node->stage = NodeStage::kFreezeWorklist; + freeze_worklist_.push_back(node); } + } else { + // Spill worklist. + node->stage = NodeStage::kSpillWorklist; + spill_worklist_.push(node); } - }; + } // Prune graph. - while (!low_degree_worklist.empty() || !high_degree_worklist.empty()) { - while (!low_degree_worklist.empty()) { - InterferenceNode* node = low_degree_worklist.front(); - // TODO: pop_back() should work as well, but it doesn't; we get a + // Note that we do not remove a node from its current worklist if it moves to another, so it may + // be in multiple worklists at once; the node's `phase` says which worklist it is really in. + while (true) { + if (!simplify_worklist_.empty()) { + // Prune low-degree nodes. + // TODO: pop_back() should work as well, but it didn't; we get a // failed check while pruning. We should look into this. - low_degree_worklist.pop_front(); - prune_node(node); - } - if (!high_degree_worklist.empty()) { - // We prune the lowest-priority node, because pruning a node earlier + InterferenceNode* node = simplify_worklist_.front(); + simplify_worklist_.pop_front(); + DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list"; + DCHECK_LT(node->GetOutDegree(), num_regs) << "Nodes in simplify list should be low degree"; + DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related"; + PruneNode(node, num_regs); + } else if (!coalesce_worklist_.empty()) { + // Coalesce. + CoalesceOpportunity* opportunity = coalesce_worklist_.top(); + coalesce_worklist_.pop(); + if (opportunity->stage == CoalesceStage::kWorklist) { + Coalesce(opportunity, num_regs); + } + } else if (!freeze_worklist_.empty()) { + // Freeze moves and prune a low-degree move-related node. + InterferenceNode* node = freeze_worklist_.front(); + freeze_worklist_.pop_front(); + if (node->stage == NodeStage::kFreezeWorklist) { + DCHECK_LT(node->GetOutDegree(), num_regs) << "Nodes in freeze list should be low degree"; + DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related"; + FreezeMoves(node, num_regs); + PruneNode(node, num_regs); + } + } else if (!spill_worklist_.empty()) { + // We spill the lowest-priority node, because pruning a node earlier // gives it a higher chance of being spilled. - InterferenceNode* node = *high_degree_worklist.rbegin(); - high_degree_worklist.erase(node); - prune_node(node); + InterferenceNode* node = spill_worklist_.top(); + spill_worklist_.pop(); + if (node->stage == NodeStage::kSpillWorklist) { + DCHECK_GE(node->GetOutDegree(), num_regs) << "Nodes in spill list should be high degree"; + FreezeMoves(node, num_regs); + PruneNode(node, num_regs); + } + } else { + // Pruning complete. + break; + } + } + DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size()); +} + +void RegisterAllocatorGraphColor::EnableCoalesceOpportunities(InterferenceNode* node) { + for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { + if (opportunity->stage == CoalesceStage::kActive) { + opportunity->stage = CoalesceStage::kWorklist; + coalesce_worklist_.push(opportunity); + } + } +} + +void RegisterAllocatorGraphColor::PruneNode(InterferenceNode* node, + size_t num_regs) { + DCHECK_NE(node->stage, NodeStage::kPruned); + DCHECK(!node->IsPrecolored()); + node->stage = NodeStage::kPruned; + pruned_nodes_.push(node); + + for (InterferenceNode* adj : node->GetAdjacentNodes()) { + DCHECK(!adj->GetInterval()->IsSlowPathSafepoint()) + << "Nodes should never interfere with synthesized safepoint nodes"; + DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes"; + + if (adj->IsPrecolored()) { + // No effect on pre-colored nodes; they're never pruned. + } else { + // Remove the interference. + bool was_high_degree = IsHighDegreeNode(adj, num_regs); + DCHECK(adj->ContainsInterference(node)) + << "Missing reflexive interference from non-fixed node"; + adj->RemoveInterference(node); + + // Handle transitions from high degree to low degree. + if (was_high_degree && IsLowDegreeNode(adj, num_regs)) { + EnableCoalesceOpportunities(adj); + for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) { + EnableCoalesceOpportunities(adj_adj); + } + + DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist); + if (adj->IsMoveRelated()) { + adj->stage = NodeStage::kFreezeWorklist; + freeze_worklist_.push_back(adj); + } else { + adj->stage = NodeStage::kSimplifyWorklist; + simplify_worklist_.push_back(adj); + } + } + } + } +} + +void RegisterAllocatorGraphColor::CheckTransitionFromFreezeWorklist(InterferenceNode* node, + size_t num_regs) { + if (IsLowDegreeNode(node, num_regs) && !node->IsMoveRelated()) { + DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist); + node->stage = NodeStage::kSimplifyWorklist; + simplify_worklist_.push_back(node); + } +} + +void RegisterAllocatorGraphColor::FreezeMoves(InterferenceNode* node, + size_t num_regs) { + for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { + if (opportunity->stage == CoalesceStage::kDefunct) { + // Constrained moves should remain constrained, since they will not be considered + // during last-chance coalescing. + } else { + opportunity->stage = CoalesceStage::kInactive; + } + InterferenceNode* other = opportunity->node_a->GetAlias() == node + ? opportunity->node_b->GetAlias() + : opportunity->node_a->GetAlias(); + if (other != node && other->stage == NodeStage::kFreezeWorklist) { + DCHECK(IsLowDegreeNode(node, num_regs)); + CheckTransitionFromFreezeWorklist(other, num_regs); } } } +bool RegisterAllocatorGraphColor::PrecoloredHeuristic(InterferenceNode* from, + InterferenceNode* into, + size_t num_regs) { + if (!into->IsPrecolored()) { + // The uncolored heuristic will cover this case. + return false; + } + if (from->IsPair() || into->IsPair()) { + // TODO: Merging from a pair node is currently not supported, since fixed pair nodes + // are currently represented as two single fixed nodes in the graph, and `into` is + // only one of them. (We may lose the implicit connections to the second one in a merge.) + return false; + } + + // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`. + // Reasons an adjacent node `adj` can be "ok": + // (1) If `adj` is low degree, interference with `into` will not affect its existing + // colorable guarantee. (Notice that coalescing cannot increase its degree.) + // (2) If `adj` is pre-colored, it already interferes with `into`. See (3). + // (3) If there's already an interference with `into`, coalescing will not add interferences. + for (InterferenceNode* adj : from->GetAdjacentNodes()) { + if (IsLowDegreeNode(adj, num_regs) || adj->IsPrecolored() || adj->ContainsInterference(into)) { + // Ok. + } else { + return false; + } + } + return true; +} + +bool RegisterAllocatorGraphColor::UncoloredHeuristic(InterferenceNode* from, + InterferenceNode* into, + size_t num_regs) { + if (into->IsPrecolored()) { + // The pre-colored heuristic will handle this case. + return false; + } + + // Arbitrary cap to improve compile time. Tests show that this has negligible affect + // on generated code. + if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs) { + return false; + } + + // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors + // of high degree. (Low degree neighbors can be ignored, because they will eventually be + // pruned from the interference graph in the simplify stage.) + size_t high_degree_interferences = 0; + for (InterferenceNode* adj : from->GetAdjacentNodes()) { + if (IsHighDegreeNode(adj, num_regs)) { + high_degree_interferences += from->EdgeWeightWith(adj); + } + } + for (InterferenceNode* adj : into->GetAdjacentNodes()) { + if (IsHighDegreeNode(adj, num_regs)) { + if (from->ContainsInterference(adj)) { + // We've already counted this adjacent node. + // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that + // we should not have counted it at all. (This extends the textbook Briggs coalescing test, + // but remains conservative.) + if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs) { + high_degree_interferences -= from->EdgeWeightWith(adj); + } + } else { + high_degree_interferences += into->EdgeWeightWith(adj); + } + } + } + + return high_degree_interferences < num_regs; +} + +void RegisterAllocatorGraphColor::Combine(InterferenceNode* from, + InterferenceNode* into, + size_t num_regs) { + from->SetAlias(into); + + // Add interferences. + for (InterferenceNode* adj : from->GetAdjacentNodes()) { + bool was_low_degree = IsLowDegreeNode(adj, num_regs); + AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false); + if (was_low_degree && IsHighDegreeNode(adj, num_regs)) { + // This is a (temporary) transition to a high degree node. Its degree will decrease again + // when we prune `from`, but it's best to be consistent about the current worklist. + adj->stage = NodeStage::kSpillWorklist; + spill_worklist_.push(adj); + } + } + + // Add coalesce opportunities. + for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) { + if (opportunity->stage != CoalesceStage::kDefunct) { + into->AddCoalesceOpportunity(opportunity); + } + } + EnableCoalesceOpportunities(from); + + // Prune and update worklists. + PruneNode(from, num_regs); + if (IsLowDegreeNode(into, num_regs)) { + // Coalesce(...) takes care of checking for a transition to the simplify worklist. + DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist); + } else if (into->stage == NodeStage::kFreezeWorklist) { + // This is a transition to a high degree node. + into->stage = NodeStage::kSpillWorklist; + spill_worklist_.push(into); + } else { + DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored); + } +} + +void RegisterAllocatorGraphColor::Coalesce(CoalesceOpportunity* opportunity, + size_t num_regs) { + InterferenceNode* from = opportunity->node_a->GetAlias(); + InterferenceNode* into = opportunity->node_b->GetAlias(); + DCHECK_NE(from->stage, NodeStage::kPruned); + DCHECK_NE(into->stage, NodeStage::kPruned); + + if (from->IsPrecolored()) { + // If we have one pre-colored node, make sure it's the `into` node. + std::swap(from, into); + } + + if (from == into) { + // These nodes have already been coalesced. + opportunity->stage = CoalesceStage::kDefunct; + CheckTransitionFromFreezeWorklist(from, num_regs); + } else if (from->IsPrecolored() || from->ContainsInterference(into)) { + // These nodes interfere. + opportunity->stage = CoalesceStage::kDefunct; + CheckTransitionFromFreezeWorklist(from, num_regs); + CheckTransitionFromFreezeWorklist(into, num_regs); + } else if (PrecoloredHeuristic(from, into, num_regs) + || UncoloredHeuristic(from, into, num_regs)) { + // We can coalesce these nodes. + opportunity->stage = CoalesceStage::kDefunct; + Combine(from, into, num_regs); + CheckTransitionFromFreezeWorklist(into, num_regs); + } else { + // We cannot coalesce, but we may be able to later. + opportunity->stage = CoalesceStage::kActive; + } +} + // Build a mask with a bit set for each register assigned to some // interval in `intervals`. template <typename Container> @@ -888,32 +1691,113 @@ static std::bitset<kMaxNumRegs> BuildConflictMask(Container& intervals) { return conflict_mask; } -bool RegisterAllocatorGraphColor::ColorInterferenceGraph( - ArenaStdStack<InterferenceNode*>* pruned_nodes, - size_t num_regs) { +bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) { + return processing_core_regs + ? !codegen_->IsCoreCalleeSaveRegister(reg) + : !codegen_->IsCoreCalleeSaveRegister(reg); +} + +static bool RegisterIsAligned(size_t reg) { + return reg % 2 == 0; +} + +static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) { + // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit. + // Note that CTZ is undefined if all bits are 0, so we special-case it. + return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong()); +} + +bool RegisterAllocatorGraphColor::ColorInterferenceGraph(size_t num_regs, + bool processing_core_regs) { DCHECK_LE(num_regs, kMaxNumRegs) << "kMaxNumRegs is too small"; ArenaVector<LiveInterval*> colored_intervals( coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); bool successful = true; - while (!pruned_nodes->empty()) { - InterferenceNode* node = pruned_nodes->top(); - pruned_nodes->pop(); + while (!pruned_nodes_.empty()) { + InterferenceNode* node = pruned_nodes_.top(); + pruned_nodes_.pop(); LiveInterval* interval = node->GetInterval(); - - // Search for free register(s). - // Note that the graph coloring allocator assumes that pair intervals are aligned here, - // excluding pre-colored pair intervals (which can currently be unaligned on x86). - std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes()); size_t reg = 0; - if (interval->HasHighInterval()) { - while (reg < num_regs - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) { - reg += 2; + + InterferenceNode* alias = node->GetAlias(); + if (alias != node) { + // This node was coalesced with another. + LiveInterval* alias_interval = alias->GetInterval(); + if (alias_interval->HasRegister()) { + reg = alias_interval->GetRegister(); + DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg]) + << "This node conflicts with the register it was coalesced with"; + } else { + DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " " + << "Move coalescing was not conservative, causing a node to be coalesced " + << "with another node that could not be colored"; + if (interval->RequiresRegister()) { + successful = false; + } } } else { - // We use CTZ (count trailing zeros) to quickly find the lowest available register. - // Note that CTZ is undefined for 0, so we special-case it. - reg = conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong()); + // Search for free register(s). + std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes()); + if (interval->HasHighInterval()) { + // Note that the graph coloring allocator assumes that pair intervals are aligned here, + // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we + // change the alignment requirements here, we will have to update the algorithm (e.g., + // be more conservative about the weight of edges adjacent to pair nodes.) + while (reg < num_regs - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) { + reg += 2; + } + + // Try to use a caller-save register first. + for (size_t i = 0; i < num_regs - 1; i += 2) { + bool low_caller_save = IsCallerSave(i, processing_core_regs); + bool high_caller_save = IsCallerSave(i + 1, processing_core_regs); + if (!conflict_mask[i] && !conflict_mask[i + 1]) { + if (low_caller_save && high_caller_save) { + reg = i; + break; + } else if (low_caller_save || high_caller_save) { + reg = i; + // Keep looking to try to get both parts in caller-save registers. + } + } + } + } else { + // Not a pair interval. + reg = FindFirstZeroInConflictMask(conflict_mask); + + // Try to use caller-save registers first. + for (size_t i = 0; i < num_regs; ++i) { + if (!conflict_mask[i] && IsCallerSave(i, processing_core_regs)) { + reg = i; + break; + } + } + } + + // Last-chance coalescing. + for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { + if (opportunity->stage == CoalesceStage::kDefunct) { + continue; + } + LiveInterval* other_interval = opportunity->node_a->GetAlias() == node + ? opportunity->node_b->GetAlias()->GetInterval() + : opportunity->node_a->GetAlias()->GetInterval(); + if (other_interval->HasRegister()) { + size_t coalesce_register = other_interval->GetRegister(); + if (interval->HasHighInterval()) { + if (!conflict_mask[coalesce_register] && + !conflict_mask[coalesce_register + 1] && + RegisterIsAligned(coalesce_register)) { + reg = coalesce_register; + break; + } + } else if (!conflict_mask[coalesce_register]) { + reg = coalesce_register; + break; + } + } + } } if (reg < (interval->HasHighInterval() ? num_regs - 1 : num_regs)) { diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h index 0b5af96b40..4052766101 100644 --- a/compiler/optimizing/register_allocator_graph_color.h +++ b/compiler/optimizing/register_allocator_graph_color.h @@ -34,6 +34,8 @@ class HParallelMove; class Location; class SsaLivenessAnalysis; class InterferenceNode; +struct CoalesceOpportunity; +enum class CoalesceKind; /** * A graph coloring register allocator. @@ -60,6 +62,25 @@ class InterferenceNode; * sparser, so that future coloring attempts may succeed. * - If the node does not require a register, we simply assign it a location on the stack. * + * If iterative move coalescing is enabled, the algorithm also attempts to conservatively + * combine nodes in the graph that would prefer to have the same color. (For example, the output + * of a phi instruction would prefer to have the same register as at least one of its inputs.) + * There are several additional steps involved with this: + * - We look for coalesce opportunities by examining each live interval, a step similar to that + * used by linear scan when looking for register hints. + * - When pruning the graph, we maintain a worklist of coalesce opportunities, as well as a worklist + * of low degree nodes that have associated coalesce opportunities. Only when we run out of + * coalesce opportunities do we start pruning coalesce-associated nodes. + * - When pruning a node, if any nodes transition from high degree to low degree, we add + * associated coalesce opportunities to the worklist, since these opportunities may now succeed. + * - Whether two nodes can be combined is decided by two different heuristics--one used when + * coalescing uncolored nodes, and one used for coalescing an uncolored node with a colored node. + * It is vital that we only combine two nodes if the node that remains is guaranteed to receive + * a color. This is because additionally spilling is more costly than failing to coalesce. + * - Even if nodes are not coalesced while pruning, we keep the coalesce opportunities around + * to be used as last-chance register hints when coloring. If nothing else, we try to use + * caller-save registers before callee-save registers. + * * A good reference for graph coloring register allocation is * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition). */ @@ -67,7 +88,8 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { public: RegisterAllocatorGraphColor(ArenaAllocator* allocator, CodeGenerator* codegen, - const SsaLivenessAnalysis& analysis); + const SsaLivenessAnalysis& analysis, + bool iterative_move_coalescing = true); ~RegisterAllocatorGraphColor() OVERRIDE {} void AllocateRegisters() OVERRIDE; @@ -116,12 +138,20 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { void BlockRegister(Location location, size_t start, size_t end); void BlockRegisters(size_t start, size_t end, bool caller_save_only = false); + static bool HasGreaterNodePriority(const InterferenceNode* lhs, const InterferenceNode* rhs); + + // Compare two coalesce opportunities based on their priority. + // Return true if lhs has a lower priority than that of rhs. + static bool CmpCoalesceOpportunity(const CoalesceOpportunity* lhs, + const CoalesceOpportunity* rhs); + // Use the intervals collected from instructions to construct an // interference graph mapping intervals to adjacency lists. // Also, collect synthesized safepoint nodes, used to keep // track of live intervals across safepoints. + // TODO: Should build safepoints elsewhere. void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals, - ArenaVector<InterferenceNode*>* prunable_nodes, + const ArenaVector<InterferenceNode*>& physical_nodes, ArenaVector<InterferenceNode*>* safepoints); // Prune nodes from the interference graph to be colored later. Build @@ -131,11 +161,61 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { size_t num_registers, ArenaStdStack<InterferenceNode*>* pruned_nodes); - // Process pruned_intervals to color the interference graph, spilling when - // necessary. Return true if successful. Else, split some intervals to make - // the interference graph sparser. - bool ColorInterferenceGraph(ArenaStdStack<InterferenceNode*>* pruned_nodes, - size_t num_registers); + // Add an edge in the interference graph, if valid. + // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion + // when possible. + void AddPotentialInterference(InterferenceNode* from, + InterferenceNode* to, + bool guaranteed_not_interfering_yet, + bool both_directions = true); + + // Create a coalesce opportunity between two nodes. + void CreateCoalesceOpportunity(InterferenceNode* a, + InterferenceNode* b, + CoalesceKind kind, + size_t position); + + // Add coalesce opportunities to interference nodes. + void FindCoalesceOpportunities(); + + // Prune nodes from the interference graph to be colored later. Returns + // a stack containing these intervals in an order determined by various + // heuristics. + // Also performs iterative conservative coalescing, based on Modern Compiler Implementation + // in Java, 2nd ed. (Andrew Appel, Cambridge University Press.) + void PruneInterferenceGraph(size_t num_registers); + + // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors) + // may be pruned from the interference graph. + void FreezeMoves(InterferenceNode* node, size_t num_regs); + + // Prune a node from the interference graph, updating worklists if necessary. + void PruneNode(InterferenceNode* node, size_t num_regs); + + // Add coalesce opportunities associated with this node to the coalesce worklist. + void EnableCoalesceOpportunities(InterferenceNode* node); + + // If needed, from `node` from the freeze worklist to the simplify worklist. + void CheckTransitionFromFreezeWorklist(InterferenceNode* node, size_t num_regs); + + // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively. + bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into, size_t num_regs); + + // Return true if `from` and `into` are uncolored, and can be coalesced conservatively. + bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into, size_t num_regs); + + void Coalesce(CoalesceOpportunity* opportunity, size_t num_regs); + + // Merge `from` into `into` in the interference graph. + void Combine(InterferenceNode* from, InterferenceNode* into, size_t num_regs); + + bool IsCallerSave(size_t reg, bool processing_core_regs); + + // Process pruned_intervals_ to color the interference graph, spilling when + // necessary. Returns true if successful. Else, some intervals have been + // split, and the interference graph should be rebuilt for another attempt. + bool ColorInterferenceGraph(size_t num_registers, + bool processing_core_regs); // Return the maximum number of registers live at safepoints, // based on the outgoing interference edges of safepoint nodes. @@ -145,6 +225,10 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { // and make sure it's ready to be spilled to the stack. void AllocateSpillSlotFor(LiveInterval* interval); + // Whether iterative move coalescing should be performed. Iterative move coalescing + // improves code quality, but increases compile time. + const bool iterative_move_coalescing_; + // Live intervals, split by kind (core and floating point). // These should not contain high intervals, as those are represented by // the corresponding low interval throughout register allocation. @@ -157,10 +241,10 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { // Safepoints, saved for special handling while processing instructions. ArenaVector<HInstruction*> safepoints_; - // Live intervals for specific registers. These become pre-colored nodes + // Interference nodes representing specific registers. These are "pre-colored" nodes // in the interference graph. - ArenaVector<LiveInterval*> physical_core_intervals_; - ArenaVector<LiveInterval*> physical_fp_intervals_; + ArenaVector<InterferenceNode*> physical_core_nodes_; + ArenaVector<InterferenceNode*> physical_fp_nodes_; // Allocated stack slot counters. size_t int_spill_slot_counter_; @@ -189,6 +273,36 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { // total memory usage by using a new arena allocator for each attempt. ArenaAllocator* coloring_attempt_allocator_; + // A monotonically increasing counter for assigning unique IDs to interference nodes. + // Unique IDs are used to maintain determinism when storing interference nodes in certain + // data structure, such as sets. + size_t node_id_counter_; + + // A map from live intervals to interference nodes. + ArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_; + + // Uncolored nodes that should be pruned from the interference graph. + ArenaVector<InterferenceNode*> prunable_nodes_; + + // A stack of nodes pruned from the interference graph, waiting to be pruned. + ArenaStdStack<InterferenceNode*> pruned_nodes_; + + // A queue containing low degree, non-move-related nodes that can pruned immediately. + ArenaDeque<InterferenceNode*> simplify_worklist_; + + // A queue containing low degree, move-related nodes. + ArenaDeque<InterferenceNode*> freeze_worklist_; + + // A queue containing high degree nodes. + // If we have to prune from the spill worklist, we cannot guarantee + // the pruned node a color, so we order the worklist by priority. + ArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_; + + // A queue containing coalesce opportunities. + // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those + // inside of loops) are more important than others. + ArenaPriorityQueue<CoalesceOpportunity*, decltype(&CmpCoalesceOpportunity)> coalesce_worklist_; + DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor); }; diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 346753b775..92788fe6b8 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -514,7 +514,9 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { // Whether the interval requires a register rather than a stack location. // If needed for performance, this could be cached. - bool RequiresRegister() const { return FirstRegisterUse() != kNoLifetime; } + bool RequiresRegister() const { + return !HasRegister() && FirstRegisterUse() != kNoLifetime; + } size_t FirstUseAfter(size_t position) const { if (is_temp_) { diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index aadc43f921..d5cd59d481 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -376,499 +376,6 @@ void ArmAssembler::Pad(uint32_t bytes) { } } -static dwarf::Reg DWARFReg(Register reg) { - return dwarf::Reg::ArmCore(static_cast<int>(reg)); -} - -static dwarf::Reg DWARFReg(SRegister reg) { - return dwarf::Reg::ArmFp(static_cast<int>(reg)); -} - -constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize); - -void ArmAssembler::BuildFrame(size_t frame_size, - ManagedRegister method_reg, - ArrayRef<const ManagedRegister> callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) { - CHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet - CHECK_ALIGNED(frame_size, kStackAlignment); - CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister()); - - // Push callee saves and link register. - RegList core_spill_mask = 1 << LR; - uint32_t fp_spill_mask = 0; - for (const ManagedRegister& reg : callee_save_regs) { - if (reg.AsArm().IsCoreRegister()) { - core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); - } else { - fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); - } - } - PushList(core_spill_mask); - cfi_.AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize); - cfi_.RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize); - if (fp_spill_mask != 0) { - vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask)); - cfi_.AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize); - cfi_.RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize); - } - - // Increase frame to required size. - int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); - CHECK_GT(frame_size, pushed_values * kFramePointerSize); // Must at least have space for Method*. - IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize); // handles CFI as well. - - // Write out Method*. - StoreToOffset(kStoreWord, R0, SP, 0); - - // Write out entry spills. - int32_t offset = frame_size + kFramePointerSize; - for (size_t i = 0; i < entry_spills.size(); ++i) { - ArmManagedRegister reg = entry_spills.at(i).AsArm(); - if (reg.IsNoRegister()) { - // only increment stack offset. - ManagedRegisterSpill spill = entry_spills.at(i); - offset += spill.getSize(); - } else if (reg.IsCoreRegister()) { - StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset); - offset += 4; - } else if (reg.IsSRegister()) { - StoreSToOffset(reg.AsSRegister(), SP, offset); - offset += 4; - } else if (reg.IsDRegister()) { - StoreDToOffset(reg.AsDRegister(), SP, offset); - offset += 8; - } - } -} - -void ArmAssembler::RemoveFrame(size_t frame_size, - ArrayRef<const ManagedRegister> callee_save_regs) { - CHECK_ALIGNED(frame_size, kStackAlignment); - cfi_.RememberState(); - - // Compute callee saves to pop and PC. - RegList core_spill_mask = 1 << PC; - uint32_t fp_spill_mask = 0; - for (const ManagedRegister& reg : callee_save_regs) { - if (reg.AsArm().IsCoreRegister()) { - core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); - } else { - fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); - } - } - - // Decrease frame to start of callee saves. - int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); - CHECK_GT(frame_size, pop_values * kFramePointerSize); - DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize)); // handles CFI as well. - - if (fp_spill_mask != 0) { - vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask)); - cfi_.AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask)); - cfi_.RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask); - } - - // Pop callee saves and PC. - PopList(core_spill_mask); - - // The CFI should be restored for any code that follows the exit block. - cfi_.RestoreState(); - cfi_.DefCFAOffset(frame_size); -} - -void ArmAssembler::IncreaseFrameSize(size_t adjust) { - AddConstant(SP, -adjust); - cfi_.AdjustCFAOffset(adjust); -} - -void ArmAssembler::DecreaseFrameSize(size_t adjust) { - AddConstant(SP, adjust); - cfi_.AdjustCFAOffset(-adjust); -} - -void ArmAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { - ArmManagedRegister src = msrc.AsArm(); - if (src.IsNoRegister()) { - CHECK_EQ(0u, size); - } else if (src.IsCoreRegister()) { - CHECK_EQ(4u, size); - StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); - } else if (src.IsRegisterPair()) { - CHECK_EQ(8u, size); - StoreToOffset(kStoreWord, src.AsRegisterPairLow(), SP, dest.Int32Value()); - StoreToOffset(kStoreWord, src.AsRegisterPairHigh(), - SP, dest.Int32Value() + 4); - } else if (src.IsSRegister()) { - StoreSToOffset(src.AsSRegister(), SP, dest.Int32Value()); - } else { - CHECK(src.IsDRegister()) << src; - StoreDToOffset(src.AsDRegister(), SP, dest.Int32Value()); - } -} - -void ArmAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { - ArmManagedRegister src = msrc.AsArm(); - CHECK(src.IsCoreRegister()) << src; - StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); -} - -void ArmAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { - ArmManagedRegister src = msrc.AsArm(); - CHECK(src.IsCoreRegister()) << src; - StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); -} - -void ArmAssembler::StoreSpanning(FrameOffset dest, ManagedRegister msrc, - FrameOffset in_off, ManagedRegister mscratch) { - ArmManagedRegister src = msrc.AsArm(); - ArmManagedRegister scratch = mscratch.AsArm(); - StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, in_off.Int32Value()); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4); -} - -void ArmAssembler::CopyRef(FrameOffset dest, FrameOffset src, - ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value()); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); -} - -void ArmAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) { - ArmManagedRegister dst = mdest.AsArm(); - CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst; - LoadFromOffset(kLoadWord, dst.AsCoreRegister(), - base.AsArm().AsCoreRegister(), offs.Int32Value()); - if (unpoison_reference) { - MaybeUnpoisonHeapReference(dst.AsCoreRegister()); - } -} - -void ArmAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) { - ArmManagedRegister dst = mdest.AsArm(); - CHECK(dst.IsCoreRegister()) << dst; - LoadFromOffset(kLoadWord, dst.AsCoreRegister(), SP, src.Int32Value()); -} - -void ArmAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, - Offset offs) { - ArmManagedRegister dst = mdest.AsArm(); - CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst; - LoadFromOffset(kLoadWord, dst.AsCoreRegister(), - base.AsArm().AsCoreRegister(), offs.Int32Value()); -} - -void ArmAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, - ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - LoadImmediate(scratch.AsCoreRegister(), imm); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); -} - -static void EmitLoad(ArmAssembler* assembler, ManagedRegister m_dst, - Register src_register, int32_t src_offset, size_t size) { - ArmManagedRegister dst = m_dst.AsArm(); - if (dst.IsNoRegister()) { - CHECK_EQ(0u, size) << dst; - } else if (dst.IsCoreRegister()) { - CHECK_EQ(4u, size) << dst; - assembler->LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset); - } else if (dst.IsRegisterPair()) { - CHECK_EQ(8u, size) << dst; - assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairLow(), src_register, src_offset); - assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairHigh(), src_register, src_offset + 4); - } else if (dst.IsSRegister()) { - assembler->LoadSFromOffset(dst.AsSRegister(), src_register, src_offset); - } else { - CHECK(dst.IsDRegister()) << dst; - assembler->LoadDFromOffset(dst.AsDRegister(), src_register, src_offset); - } -} - -void ArmAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) { - return EmitLoad(this, m_dst, SP, src.Int32Value(), size); -} - -void ArmAssembler::LoadFromThread(ManagedRegister m_dst, ThreadOffset32 src, size_t size) { - return EmitLoad(this, m_dst, TR, src.Int32Value(), size); -} - -void ArmAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset32 offs) { - ArmManagedRegister dst = m_dst.AsArm(); - CHECK(dst.IsCoreRegister()) << dst; - LoadFromOffset(kLoadWord, dst.AsCoreRegister(), TR, offs.Int32Value()); -} - -void ArmAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, - ThreadOffset32 thr_offs, - ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), - TR, thr_offs.Int32Value()); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), - SP, fr_offs.Int32Value()); -} - -void ArmAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), - SP, fr_offs.Int32Value()); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), - TR, thr_offs.Int32Value()); -} - -void ArmAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value(), AL); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), - TR, thr_offs.Int32Value()); -} - -void ArmAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) { - StoreToOffset(kStoreWord, SP, TR, thr_offs.Int32Value()); -} - -void ArmAssembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm"; -} - -void ArmAssembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm"; -} - -void ArmAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t /*size*/) { - ArmManagedRegister dst = m_dst.AsArm(); - ArmManagedRegister src = m_src.AsArm(); - if (!dst.Equals(src)) { - if (dst.IsCoreRegister()) { - CHECK(src.IsCoreRegister()) << src; - mov(dst.AsCoreRegister(), ShifterOperand(src.AsCoreRegister())); - } else if (dst.IsDRegister()) { - CHECK(src.IsDRegister()) << src; - vmovd(dst.AsDRegister(), src.AsDRegister()); - } else if (dst.IsSRegister()) { - CHECK(src.IsSRegister()) << src; - vmovs(dst.AsSRegister(), src.AsSRegister()); - } else { - CHECK(dst.IsRegisterPair()) << dst; - CHECK(src.IsRegisterPair()) << src; - // Ensure that the first move doesn't clobber the input of the second. - if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) { - mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow())); - mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh())); - } else { - mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh())); - mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow())); - } - } - } -} - -void ArmAssembler::Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - CHECK(size == 4 || size == 8) << size; - if (size == 4) { - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value()); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); - } else if (size == 8) { - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value()); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + 4); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4); - } -} - -void ArmAssembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, - ManagedRegister mscratch, size_t size) { - Register scratch = mscratch.AsArm().AsCoreRegister(); - CHECK_EQ(size, 4u); - LoadFromOffset(kLoadWord, scratch, src_base.AsArm().AsCoreRegister(), src_offset.Int32Value()); - StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value()); -} - -void ArmAssembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, - ManagedRegister mscratch, size_t size) { - Register scratch = mscratch.AsArm().AsCoreRegister(); - CHECK_EQ(size, 4u); - LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value()); - StoreToOffset(kStoreWord, scratch, dest_base.AsArm().AsCoreRegister(), dest_offset.Int32Value()); -} - -void ArmAssembler::Copy(FrameOffset /*dst*/, FrameOffset /*src_base*/, Offset /*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL); -} - -void ArmAssembler::Copy(ManagedRegister dest, Offset dest_offset, - ManagedRegister src, Offset src_offset, - ManagedRegister mscratch, size_t size) { - CHECK_EQ(size, 4u); - Register scratch = mscratch.AsArm().AsCoreRegister(); - LoadFromOffset(kLoadWord, scratch, src.AsArm().AsCoreRegister(), src_offset.Int32Value()); - StoreToOffset(kStoreWord, scratch, dest.AsArm().AsCoreRegister(), dest_offset.Int32Value()); -} - -void ArmAssembler::Copy(FrameOffset /*dst*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset /*src_offset*/, - ManagedRegister /*scratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL); -} - -void ArmAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg, - FrameOffset handle_scope_offset, - ManagedRegister min_reg, bool null_allowed) { - ArmManagedRegister out_reg = mout_reg.AsArm(); - ArmManagedRegister in_reg = min_reg.AsArm(); - CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg; - CHECK(out_reg.IsCoreRegister()) << out_reg; - if (null_allowed) { - // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is - // the address in the handle scope holding the reference. - // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) - if (in_reg.IsNoRegister()) { - LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), - SP, handle_scope_offset.Int32Value()); - in_reg = out_reg; - } - cmp(in_reg.AsCoreRegister(), ShifterOperand(0)); - if (!out_reg.Equals(in_reg)) { - it(EQ, kItElse); - LoadImmediate(out_reg.AsCoreRegister(), 0, EQ); - } else { - it(NE); - } - AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE); - } else { - AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL); - } -} - -void ArmAssembler::CreateHandleScopeEntry(FrameOffset out_off, - FrameOffset handle_scope_offset, - ManagedRegister mscratch, - bool null_allowed) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - if (null_allowed) { - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, - handle_scope_offset.Int32Value()); - // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is - // the address in the handle scope holding the reference. - // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset) - cmp(scratch.AsCoreRegister(), ShifterOperand(0)); - it(NE); - AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE); - } else { - AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL); - } - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value()); -} - -void ArmAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, - ManagedRegister min_reg) { - ArmManagedRegister out_reg = mout_reg.AsArm(); - ArmManagedRegister in_reg = min_reg.AsArm(); - CHECK(out_reg.IsCoreRegister()) << out_reg; - CHECK(in_reg.IsCoreRegister()) << in_reg; - Label null_arg; - if (!out_reg.Equals(in_reg)) { - LoadImmediate(out_reg.AsCoreRegister(), 0, EQ); // TODO: why EQ? - } - cmp(in_reg.AsCoreRegister(), ShifterOperand(0)); - it(NE); - LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), - in_reg.AsCoreRegister(), 0, NE); -} - -void ArmAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { - // TODO: not validating references. -} - -void ArmAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { - // TODO: not validating references. -} - -void ArmAssembler::Call(ManagedRegister mbase, Offset offset, - ManagedRegister mscratch) { - ArmManagedRegister base = mbase.AsArm(); - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(base.IsCoreRegister()) << base; - CHECK(scratch.IsCoreRegister()) << scratch; - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), - base.AsCoreRegister(), offset.Int32Value()); - blx(scratch.AsCoreRegister()); - // TODO: place reference map on call. -} - -void ArmAssembler::Call(FrameOffset base, Offset offset, - ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - // Call *(*(SP + base) + offset) - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), - SP, base.Int32Value()); - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), - scratch.AsCoreRegister(), offset.Int32Value()); - blx(scratch.AsCoreRegister()); - // TODO: place reference map on call -} - -void ArmAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED, - ManagedRegister scratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void ArmAssembler::GetCurrentThread(ManagedRegister tr) { - mov(tr.AsArm().AsCoreRegister(), ShifterOperand(TR)); -} - -void ArmAssembler::GetCurrentThread(FrameOffset offset, - ManagedRegister /*scratch*/) { - StoreToOffset(kStoreWord, TR, SP, offset.Int32Value(), AL); -} - -void ArmAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) { - ArmManagedRegister scratch = mscratch.AsArm(); - ArmExceptionSlowPath* slow = new (GetArena()) ArmExceptionSlowPath(scratch, stack_adjust); - buffer_.EnqueueSlowPath(slow); - LoadFromOffset(kLoadWord, - scratch.AsCoreRegister(), - TR, - Thread::ExceptionOffset<kArmPointerSize>().Int32Value()); - cmp(scratch.AsCoreRegister(), ShifterOperand(0)); - b(slow->Entry(), NE); -} - -void ArmExceptionSlowPath::Emit(Assembler* sasm) { - ArmAssembler* sp_asm = down_cast<ArmAssembler*>(sasm); -#define __ sp_asm-> - __ Bind(&entry_); - if (stack_adjust_ != 0) { // Fix up the frame. - __ DecreaseFrameSize(stack_adjust_); - } - // Pass exception object as argument. - // Don't care about preserving R0 as this call won't return. - __ mov(R0, ShifterOperand(scratch_.AsCoreRegister())); - // Set up call to Thread::Current()->pDeliverException. - __ LoadFromOffset(kLoadWord, - R12, - TR, - QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pDeliverException).Int32Value()); - __ blx(R12); -#undef __ -} - - static int LeadingZeros(uint32_t val) { uint32_t alt; int32_t n; diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index bb88e6fdf4..ff0bbafb9a 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -435,19 +435,10 @@ extern const char* kConditionNames[]; // This is an abstract ARM assembler. Subclasses provide assemblers for the individual // instruction sets (ARM32, Thumb2, etc.) // -class ArmAssembler : public Assembler, public JNIMacroAssembler<PointerSize::k32> { +class ArmAssembler : public Assembler { public: virtual ~ArmAssembler() {} - size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); } - DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); } - void FinalizeCode() OVERRIDE { - Assembler::FinalizeCode(); - } - void FinalizeInstructions(const MemoryRegion& region) { - Assembler::FinalizeInstructions(region); - } - // Is this assembler for the thumb instruction set? virtual bool IsThumb() const = 0; @@ -891,121 +882,6 @@ class ArmAssembler : public Assembler, public JNIMacroAssembler<PointerSize::k32 virtual void CompareAndBranchIfZero(Register r, Label* label) = 0; virtual void CompareAndBranchIfNonZero(Register r, Label* label) = 0; - // - // Overridden common assembler high-level functionality - // - - // Emit code that will create an activation on the stack - void BuildFrame(size_t frame_size, - ManagedRegister method_reg, - ArrayRef<const ManagedRegister> callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; - - // Emit code that will remove an activation from the stack - void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) - OVERRIDE; - - void IncreaseFrameSize(size_t adjust) OVERRIDE; - void DecreaseFrameSize(size_t adjust) OVERRIDE; - - // Store routines - void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; - void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; - void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; - - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; - - void StoreStackOffsetToThread(ThreadOffset32 thr_offs, - FrameOffset fr_offs, - ManagedRegister scratch) OVERRIDE; - - void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE; - - void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off, - ManagedRegister scratch) OVERRIDE; - - // Load routines - void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; - - void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE; - - void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; - - void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) OVERRIDE; - - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; - - void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE; - - // Copying routines - void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; - - void CopyRawPtrFromThread(FrameOffset fr_offs, - ThreadOffset32 thr_offs, - ManagedRegister scratch) OVERRIDE; - - void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) - OVERRIDE; - - void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; - - void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; - - void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; - - void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, - size_t size) OVERRIDE; - - void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; - - void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; - - void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; - - // Sign extension - void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; - - // Zero extension - void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; - - // Exploit fast access in managed code to Thread::Current() - void GetCurrentThread(ManagedRegister tr) OVERRIDE; - void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; - - // Set up out_reg to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. in_reg holds a possibly stale reference - // that can be used to avoid loading the handle scope entry to see if the value is - // null. - void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, - ManagedRegister in_reg, bool null_allowed) OVERRIDE; - - // Set up out_off to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. - void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, - ManagedRegister scratch, bool null_allowed) OVERRIDE; - - // src holds a handle scope entry (Object**) load this into dst - void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; - - // Heap::VerifyObject on src. In some cases (such as a reference to this) we - // know that src may not be null. - void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; - void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; - - // Call to address held at [base+offset] - void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; - void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; - void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE; - - // Generate code to check if Thread::Current()->exception_ is non-null - // and branch to a ExceptionSlowPath if it is. - void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; - static uint32_t ModifiedImmediate(uint32_t value); static bool IsLowRegister(Register r) { @@ -1083,18 +959,6 @@ class ArmAssembler : public Assembler, public JNIMacroAssembler<PointerSize::k32 ArenaVector<Label*> tracked_labels_; }; -// Slowpath entered when Thread::Current()->_exception is non-null -class ArmExceptionSlowPath FINAL : public SlowPath { - public: - ArmExceptionSlowPath(ArmManagedRegister scratch, size_t stack_adjust) - : scratch_(scratch), stack_adjust_(stack_adjust) { - } - void Emit(Assembler *sp_asm) OVERRIDE; - private: - const ArmManagedRegister scratch_; - const size_t stack_adjust_; -}; - } // namespace arm } // namespace art diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index c95dfa8066..6f9d5f32af 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -1664,12 +1664,6 @@ void Arm32Assembler::StoreDToOffset(DRegister reg, } -void Arm32Assembler::MemoryBarrier(ManagedRegister mscratch) { - CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12); - dmb(SY); -} - - void Arm32Assembler::dmb(DmbOptions flavor) { int32_t encoding = 0xf57ff05f; // dmb Emit(encoding | flavor); diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 554dd2350b..044eaa1edf 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -316,8 +316,6 @@ class Arm32Assembler FINAL : public ArmAssembler { void Emit(int32_t value); void Bind(Label* label) OVERRIDE; - void MemoryBarrier(ManagedRegister scratch) OVERRIDE; - JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE; void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 4be7aae243..ee69698ce8 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -3863,12 +3863,6 @@ void Thumb2Assembler::StoreDToOffset(DRegister reg, } -void Thumb2Assembler::MemoryBarrier(ManagedRegister mscratch) { - CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12); - dmb(SY); -} - - void Thumb2Assembler::dmb(DmbOptions flavor) { int32_t encoding = 0xf3bf8f50; // dmb in T1 encoding. Emit32(encoding | flavor); diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 4ee23c0e27..1c1c98b52b 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -368,8 +368,6 @@ class Thumb2Assembler FINAL : public ArmAssembler { void Emit16(int16_t value); // Emit a 16 bit instruction in little endian format. void Bind(Label* label) OVERRIDE; - void MemoryBarrier(ManagedRegister scratch) OVERRIDE; - // Force the assembler to generate 32 bit instructions. void Force32Bit() { force_32bit_ = true; diff --git a/compiler/utils/arm/jni_macro_assembler_arm.cc b/compiler/utils/arm/jni_macro_assembler_arm.cc new file mode 100644 index 0000000000..c03981653e --- /dev/null +++ b/compiler/utils/arm/jni_macro_assembler_arm.cc @@ -0,0 +1,612 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni_macro_assembler_arm.h" + +#include <algorithm> + +#include "assembler_arm32.h" +#include "assembler_thumb2.h" +#include "base/arena_allocator.h" +#include "base/bit_utils.h" +#include "base/logging.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "offsets.h" +#include "thread.h" + +namespace art { +namespace arm { + +constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize); + +// Slowpath entered when Thread::Current()->_exception is non-null +class ArmExceptionSlowPath FINAL : public SlowPath { + public: + ArmExceptionSlowPath(ArmManagedRegister scratch, size_t stack_adjust) + : scratch_(scratch), stack_adjust_(stack_adjust) { + } + void Emit(Assembler *sp_asm) OVERRIDE; + private: + const ArmManagedRegister scratch_; + const size_t stack_adjust_; +}; + +ArmJNIMacroAssembler::ArmJNIMacroAssembler(ArenaAllocator* arena, InstructionSet isa) { + switch (isa) { + case kArm: + asm_.reset(new (arena) Arm32Assembler(arena)); + break; + + case kThumb2: + asm_.reset(new (arena) Thumb2Assembler(arena)); + break; + + default: + LOG(FATAL) << isa; + UNREACHABLE(); + } +} + +ArmJNIMacroAssembler::~ArmJNIMacroAssembler() { +} + +size_t ArmJNIMacroAssembler::CodeSize() const { + return asm_->CodeSize(); +} + +DebugFrameOpCodeWriterForAssembler& ArmJNIMacroAssembler::cfi() { + return asm_->cfi(); +} + +void ArmJNIMacroAssembler::FinalizeCode() { + asm_->FinalizeCode(); +} + +void ArmJNIMacroAssembler::FinalizeInstructions(const MemoryRegion& region) { + asm_->FinalizeInstructions(region); +} + +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::ArmCore(static_cast<int>(reg)); +} + +static dwarf::Reg DWARFReg(SRegister reg) { + return dwarf::Reg::ArmFp(static_cast<int>(reg)); +} + +#define __ asm_-> + +void ArmJNIMacroAssembler::BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) { + CHECK_EQ(CodeSize(), 0U); // Nothing emitted yet + CHECK_ALIGNED(frame_size, kStackAlignment); + CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister()); + + // Push callee saves and link register. + RegList core_spill_mask = 1 << LR; + uint32_t fp_spill_mask = 0; + for (const ManagedRegister& reg : callee_save_regs) { + if (reg.AsArm().IsCoreRegister()) { + core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); + } else { + fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); + } + } + __ PushList(core_spill_mask); + cfi().AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize); + cfi().RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize); + if (fp_spill_mask != 0) { + __ vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask)); + cfi().AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize); + cfi().RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize); + } + + // Increase frame to required size. + int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); + CHECK_GT(frame_size, pushed_values * kFramePointerSize); // Must at least have space for Method*. + IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize); // handles CFI as well. + + // Write out Method*. + __ StoreToOffset(kStoreWord, R0, SP, 0); + + // Write out entry spills. + int32_t offset = frame_size + kFramePointerSize; + for (size_t i = 0; i < entry_spills.size(); ++i) { + ArmManagedRegister reg = entry_spills.at(i).AsArm(); + if (reg.IsNoRegister()) { + // only increment stack offset. + ManagedRegisterSpill spill = entry_spills.at(i); + offset += spill.getSize(); + } else if (reg.IsCoreRegister()) { + __ StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset); + offset += 4; + } else if (reg.IsSRegister()) { + __ StoreSToOffset(reg.AsSRegister(), SP, offset); + offset += 4; + } else if (reg.IsDRegister()) { + __ StoreDToOffset(reg.AsDRegister(), SP, offset); + offset += 8; + } + } +} + +void ArmJNIMacroAssembler::RemoveFrame(size_t frame_size, + ArrayRef<const ManagedRegister> callee_save_regs) { + CHECK_ALIGNED(frame_size, kStackAlignment); + cfi().RememberState(); + + // Compute callee saves to pop and PC. + RegList core_spill_mask = 1 << PC; + uint32_t fp_spill_mask = 0; + for (const ManagedRegister& reg : callee_save_regs) { + if (reg.AsArm().IsCoreRegister()) { + core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); + } else { + fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); + } + } + + // Decrease frame to start of callee saves. + int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); + CHECK_GT(frame_size, pop_values * kFramePointerSize); + DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize)); // handles CFI as well. + + if (fp_spill_mask != 0) { + __ vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask)); + cfi().AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask)); + cfi().RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask); + } + + // Pop callee saves and PC. + __ PopList(core_spill_mask); + + // The CFI should be restored for any code that follows the exit block. + cfi().RestoreState(); + cfi().DefCFAOffset(frame_size); +} + +void ArmJNIMacroAssembler::IncreaseFrameSize(size_t adjust) { + __ AddConstant(SP, -adjust); + cfi().AdjustCFAOffset(adjust); +} + +static void DecreaseFrameSizeImpl(ArmAssembler* assembler, size_t adjust) { + assembler->AddConstant(SP, adjust); + assembler->cfi().AdjustCFAOffset(-adjust); +} + +void ArmJNIMacroAssembler::DecreaseFrameSize(size_t adjust) { + DecreaseFrameSizeImpl(asm_.get(), adjust); +} + +void ArmJNIMacroAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { + ArmManagedRegister src = msrc.AsArm(); + if (src.IsNoRegister()) { + CHECK_EQ(0u, size); + } else if (src.IsCoreRegister()) { + CHECK_EQ(4u, size); + __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); + } else if (src.IsRegisterPair()) { + CHECK_EQ(8u, size); + __ StoreToOffset(kStoreWord, src.AsRegisterPairLow(), SP, dest.Int32Value()); + __ StoreToOffset(kStoreWord, src.AsRegisterPairHigh(), SP, dest.Int32Value() + 4); + } else if (src.IsSRegister()) { + __ StoreSToOffset(src.AsSRegister(), SP, dest.Int32Value()); + } else { + CHECK(src.IsDRegister()) << src; + __ StoreDToOffset(src.AsDRegister(), SP, dest.Int32Value()); + } +} + +void ArmJNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { + ArmManagedRegister src = msrc.AsArm(); + CHECK(src.IsCoreRegister()) << src; + __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); +} + +void ArmJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { + ArmManagedRegister src = msrc.AsArm(); + CHECK(src.IsCoreRegister()) << src; + __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); +} + +void ArmJNIMacroAssembler::StoreSpanning(FrameOffset dest, + ManagedRegister msrc, + FrameOffset in_off, + ManagedRegister mscratch) { + ArmManagedRegister src = msrc.AsArm(); + ArmManagedRegister scratch = mscratch.AsArm(); + __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, in_off.Int32Value()); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + sizeof(uint32_t)); +} + +void ArmJNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value()); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); +} + +void ArmJNIMacroAssembler::LoadRef(ManagedRegister mdest, + ManagedRegister base, + MemberOffset offs, + bool unpoison_reference) { + ArmManagedRegister dst = mdest.AsArm(); + CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst; + __ LoadFromOffset(kLoadWord, + dst.AsCoreRegister(), + base.AsArm().AsCoreRegister(), + offs.Int32Value()); + if (unpoison_reference) { + __ MaybeUnpoisonHeapReference(dst.AsCoreRegister()); + } +} + +void ArmJNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) { + ArmManagedRegister dst = mdest.AsArm(); + CHECK(dst.IsCoreRegister()) << dst; + __ LoadFromOffset(kLoadWord, dst.AsCoreRegister(), SP, src.Int32Value()); +} + +void ArmJNIMacroAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, + Offset offs) { + ArmManagedRegister dst = mdest.AsArm(); + CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst; + __ LoadFromOffset(kLoadWord, + dst.AsCoreRegister(), + base.AsArm().AsCoreRegister(), + offs.Int32Value()); +} + +void ArmJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, + uint32_t imm, + ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + __ LoadImmediate(scratch.AsCoreRegister(), imm); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); +} + +static void EmitLoad(ArmAssembler* assembler, + ManagedRegister m_dst, + Register src_register, + int32_t src_offset, + size_t size) { + ArmManagedRegister dst = m_dst.AsArm(); + if (dst.IsNoRegister()) { + CHECK_EQ(0u, size) << dst; + } else if (dst.IsCoreRegister()) { + CHECK_EQ(4u, size) << dst; + assembler->LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset); + } else if (dst.IsRegisterPair()) { + CHECK_EQ(8u, size) << dst; + assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairLow(), src_register, src_offset); + assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairHigh(), src_register, src_offset + 4); + } else if (dst.IsSRegister()) { + assembler->LoadSFromOffset(dst.AsSRegister(), src_register, src_offset); + } else { + CHECK(dst.IsDRegister()) << dst; + assembler->LoadDFromOffset(dst.AsDRegister(), src_register, src_offset); + } +} + +void ArmJNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) { + EmitLoad(asm_.get(), m_dst, SP, src.Int32Value(), size); +} + +void ArmJNIMacroAssembler::LoadFromThread(ManagedRegister m_dst, ThreadOffset32 src, size_t size) { + EmitLoad(asm_.get(), m_dst, TR, src.Int32Value(), size); +} + +void ArmJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset32 offs) { + ArmManagedRegister dst = m_dst.AsArm(); + CHECK(dst.IsCoreRegister()) << dst; + __ LoadFromOffset(kLoadWord, dst.AsCoreRegister(), TR, offs.Int32Value()); +} + +void ArmJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset32 thr_offs, + ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value()); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, fr_offs.Int32Value()); +} + +void ArmJNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, fr_offs.Int32Value()); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value()); +} + +void ArmJNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + __ AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value(), AL); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value()); +} + +void ArmJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) { + __ StoreToOffset(kStoreWord, SP, TR, thr_offs.Int32Value()); +} + +void ArmJNIMacroAssembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) { + UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm"; +} + +void ArmJNIMacroAssembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) { + UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm"; +} + +void ArmJNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t /*size*/) { + ArmManagedRegister dst = m_dst.AsArm(); + ArmManagedRegister src = m_src.AsArm(); + if (!dst.Equals(src)) { + if (dst.IsCoreRegister()) { + CHECK(src.IsCoreRegister()) << src; + __ mov(dst.AsCoreRegister(), ShifterOperand(src.AsCoreRegister())); + } else if (dst.IsDRegister()) { + CHECK(src.IsDRegister()) << src; + __ vmovd(dst.AsDRegister(), src.AsDRegister()); + } else if (dst.IsSRegister()) { + CHECK(src.IsSRegister()) << src; + __ vmovs(dst.AsSRegister(), src.AsSRegister()); + } else { + CHECK(dst.IsRegisterPair()) << dst; + CHECK(src.IsRegisterPair()) << src; + // Ensure that the first move doesn't clobber the input of the second. + if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) { + __ mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow())); + __ mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh())); + } else { + __ mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh())); + __ mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow())); + } + } + } +} + +void ArmJNIMacroAssembler::Copy(FrameOffset dest, + FrameOffset src, + ManagedRegister mscratch, + size_t size) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value()); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); + } else if (size == 8) { + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value()); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + 4); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4); + } +} + +void ArmJNIMacroAssembler::Copy(FrameOffset dest, + ManagedRegister src_base, + Offset src_offset, + ManagedRegister mscratch, + size_t size) { + Register scratch = mscratch.AsArm().AsCoreRegister(); + CHECK_EQ(size, 4u); + __ LoadFromOffset(kLoadWord, scratch, src_base.AsArm().AsCoreRegister(), src_offset.Int32Value()); + __ StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value()); +} + +void ArmJNIMacroAssembler::Copy(ManagedRegister dest_base, + Offset dest_offset, + FrameOffset src, + ManagedRegister mscratch, + size_t size) { + Register scratch = mscratch.AsArm().AsCoreRegister(); + CHECK_EQ(size, 4u); + __ LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value()); + __ StoreToOffset(kStoreWord, + scratch, + dest_base.AsArm().AsCoreRegister(), + dest_offset.Int32Value()); +} + +void ArmJNIMacroAssembler::Copy(FrameOffset /*dst*/, + FrameOffset /*src_base*/, + Offset /*src_offset*/, + ManagedRegister /*mscratch*/, + size_t /*size*/) { + UNIMPLEMENTED(FATAL); +} + +void ArmJNIMacroAssembler::Copy(ManagedRegister dest, + Offset dest_offset, + ManagedRegister src, + Offset src_offset, + ManagedRegister mscratch, + size_t size) { + CHECK_EQ(size, 4u); + Register scratch = mscratch.AsArm().AsCoreRegister(); + __ LoadFromOffset(kLoadWord, scratch, src.AsArm().AsCoreRegister(), src_offset.Int32Value()); + __ StoreToOffset(kStoreWord, scratch, dest.AsArm().AsCoreRegister(), dest_offset.Int32Value()); +} + +void ArmJNIMacroAssembler::Copy(FrameOffset /*dst*/, + Offset /*dest_offset*/, + FrameOffset /*src*/, + Offset /*src_offset*/, + ManagedRegister /*scratch*/, + size_t /*size*/) { + UNIMPLEMENTED(FATAL); +} + +void ArmJNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg, + FrameOffset handle_scope_offset, + ManagedRegister min_reg, + bool null_allowed) { + ArmManagedRegister out_reg = mout_reg.AsArm(); + ArmManagedRegister in_reg = min_reg.AsArm(); + CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg; + CHECK(out_reg.IsCoreRegister()) << out_reg; + if (null_allowed) { + // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is + // the address in the handle scope holding the reference. + // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) + if (in_reg.IsNoRegister()) { + __ LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value()); + in_reg = out_reg; + } + __ cmp(in_reg.AsCoreRegister(), ShifterOperand(0)); + if (!out_reg.Equals(in_reg)) { + __ it(EQ, kItElse); + __ LoadImmediate(out_reg.AsCoreRegister(), 0, EQ); + } else { + __ it(NE); + } + __ AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE); + } else { + __ AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL); + } +} + +void ArmJNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handle_scope_offset, + ManagedRegister mscratch, + bool null_allowed) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + if (null_allowed) { + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value()); + // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is + // the address in the handle scope holding the reference. + // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset) + __ cmp(scratch.AsCoreRegister(), ShifterOperand(0)); + __ it(NE); + __ AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE); + } else { + __ AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL); + } + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value()); +} + +void ArmJNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, + ManagedRegister min_reg) { + ArmManagedRegister out_reg = mout_reg.AsArm(); + ArmManagedRegister in_reg = min_reg.AsArm(); + CHECK(out_reg.IsCoreRegister()) << out_reg; + CHECK(in_reg.IsCoreRegister()) << in_reg; + Label null_arg; + if (!out_reg.Equals(in_reg)) { + __ LoadImmediate(out_reg.AsCoreRegister(), 0, EQ); // TODO: why EQ? + } + __ cmp(in_reg.AsCoreRegister(), ShifterOperand(0)); + __ it(NE); + __ LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), in_reg.AsCoreRegister(), 0, NE); +} + +void ArmJNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { + // TODO: not validating references. +} + +void ArmJNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { + // TODO: not validating references. +} + +void ArmJNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, + ManagedRegister mscratch) { + ArmManagedRegister base = mbase.AsArm(); + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(base.IsCoreRegister()) << base; + CHECK(scratch.IsCoreRegister()) << scratch; + __ LoadFromOffset(kLoadWord, + scratch.AsCoreRegister(), + base.AsCoreRegister(), + offset.Int32Value()); + __ blx(scratch.AsCoreRegister()); + // TODO: place reference map on call. +} + +void ArmJNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + // Call *(*(SP + base) + offset) + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, base.Int32Value()); + __ LoadFromOffset(kLoadWord, + scratch.AsCoreRegister(), + scratch.AsCoreRegister(), + offset.Int32Value()); + __ blx(scratch.AsCoreRegister()); + // TODO: place reference map on call +} + +void ArmJNIMacroAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED, + ManagedRegister scratch ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL); +} + +void ArmJNIMacroAssembler::GetCurrentThread(ManagedRegister tr) { + __ mov(tr.AsArm().AsCoreRegister(), ShifterOperand(TR)); +} + +void ArmJNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister /*scratch*/) { + __ StoreToOffset(kStoreWord, TR, SP, offset.Int32Value(), AL); +} + +void ArmJNIMacroAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) { + ArmManagedRegister scratch = mscratch.AsArm(); + ArmExceptionSlowPath* slow = new (__ GetArena()) ArmExceptionSlowPath(scratch, stack_adjust); + __ GetBuffer()->EnqueueSlowPath(slow); + __ LoadFromOffset(kLoadWord, + scratch.AsCoreRegister(), + TR, + Thread::ExceptionOffset<kArmPointerSize>().Int32Value()); + __ cmp(scratch.AsCoreRegister(), ShifterOperand(0)); + __ b(slow->Entry(), NE); +} + +#undef __ + +void ArmExceptionSlowPath::Emit(Assembler* sasm) { + ArmAssembler* sp_asm = down_cast<ArmAssembler*>(sasm); +#define __ sp_asm-> + __ Bind(&entry_); + if (stack_adjust_ != 0) { // Fix up the frame. + DecreaseFrameSizeImpl(sp_asm, stack_adjust_); + } + // Pass exception object as argument. + // Don't care about preserving R0 as this call won't return. + __ mov(R0, ShifterOperand(scratch_.AsCoreRegister())); + // Set up call to Thread::Current()->pDeliverException. + __ LoadFromOffset(kLoadWord, + R12, + TR, + QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pDeliverException).Int32Value()); + __ blx(R12); +#undef __ +} + +void ArmJNIMacroAssembler::MemoryBarrier(ManagedRegister mscratch) { + CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12); + asm_->dmb(SY); +} + +} // namespace arm +} // namespace art diff --git a/compiler/utils/arm/jni_macro_assembler_arm.h b/compiler/utils/arm/jni_macro_assembler_arm.h new file mode 100644 index 0000000000..4471906c27 --- /dev/null +++ b/compiler/utils/arm/jni_macro_assembler_arm.h @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_ +#define ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_ + +#include <memory> +#include <type_traits> +#include <vector> + +#include "arch/instruction_set.h" +#include "base/enums.h" +#include "base/macros.h" +#include "utils/jni_macro_assembler.h" +#include "offsets.h" + +namespace art { +namespace arm { + +class ArmAssembler; + +class ArmJNIMacroAssembler : public JNIMacroAssembler<PointerSize::k32> { + public: + ArmJNIMacroAssembler(ArenaAllocator* arena, InstructionSet isa); + virtual ~ArmJNIMacroAssembler(); + + size_t CodeSize() const OVERRIDE; + DebugFrameOpCodeWriterForAssembler& cfi() OVERRIDE; + void FinalizeCode() OVERRIDE; + void FinalizeInstructions(const MemoryRegion& region) OVERRIDE; + + // + // Overridden common assembler high-level functionality + // + + // Emit code that will create an activation on the stack + void BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + + // Emit code that will remove an activation from the stack + void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) + OVERRIDE; + + void IncreaseFrameSize(size_t adjust) OVERRIDE; + void DecreaseFrameSize(size_t adjust) OVERRIDE; + + // Store routines + void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; + void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; + void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; + + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; + + void StoreStackOffsetToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister scratch) OVERRIDE; + + void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE; + + void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off, + ManagedRegister scratch) OVERRIDE; + + // Load routines + void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; + + void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE; + + void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + + void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, + bool unpoison_reference) OVERRIDE; + + void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; + + void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE; + + // Copying routines + void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; + + void CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset32 thr_offs, + ManagedRegister scratch) OVERRIDE; + + void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) + OVERRIDE; + + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; + + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; + + void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, + ManagedRegister scratch, size_t size) OVERRIDE; + + void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, + ManagedRegister scratch, size_t size) OVERRIDE; + + // Sign extension + void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Zero extension + void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Exploit fast access in managed code to Thread::Current() + void GetCurrentThread(ManagedRegister tr) OVERRIDE; + void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; + + // Set up out_reg to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. in_reg holds a possibly stale reference + // that can be used to avoid loading the handle scope entry to see if the value is + // null. + void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, + ManagedRegister in_reg, bool null_allowed) OVERRIDE; + + // Set up out_off to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. + void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, + ManagedRegister scratch, bool null_allowed) OVERRIDE; + + // src holds a handle scope entry (Object**) load this into dst + void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; + + // Heap::VerifyObject on src. In some cases (such as a reference to this) we + // know that src may not be null. + void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; + void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + + // Call to address held at [base+offset] + void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; + void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; + void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE; + + // Generate code to check if Thread::Current()->exception_ is non-null + // and branch to a ExceptionSlowPath if it is. + void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; + + void MemoryBarrier(ManagedRegister scratch) OVERRIDE; + + private: + std::unique_ptr<ArmAssembler> asm_; +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_ diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 53685bfa53..22221e752a 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -32,9 +32,6 @@ namespace arm64 { #endif void Arm64Assembler::FinalizeCode() { - for (const std::unique_ptr<Arm64Exception>& exception : exception_blocks_) { - EmitExceptionPoll(exception.get()); - } ___ FinalizeCode(); } @@ -52,254 +49,6 @@ void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) { region.CopyFrom(0, from); } -void Arm64Assembler::GetCurrentThread(ManagedRegister tr) { - ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR)); -} - -void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) { - StoreToOffset(TR, SP, offset.Int32Value()); -} - -// See Arm64 PCS Section 5.2.2.1. -void Arm64Assembler::IncreaseFrameSize(size_t adjust) { - CHECK_ALIGNED(adjust, kStackAlignment); - AddConstant(SP, -adjust); - cfi().AdjustCFAOffset(adjust); -} - -// See Arm64 PCS Section 5.2.2.1. -void Arm64Assembler::DecreaseFrameSize(size_t adjust) { - CHECK_ALIGNED(adjust, kStackAlignment); - AddConstant(SP, adjust); - cfi().AdjustCFAOffset(-adjust); -} - -void Arm64Assembler::AddConstant(XRegister rd, int32_t value, Condition cond) { - AddConstant(rd, rd, value, cond); -} - -void Arm64Assembler::AddConstant(XRegister rd, XRegister rn, int32_t value, - Condition cond) { - if ((cond == al) || (cond == nv)) { - // VIXL macro-assembler handles all variants. - ___ Add(reg_x(rd), reg_x(rn), value); - } else { - // temp = rd + value - // rd = cond ? temp : rn - UseScratchRegisterScope temps(&vixl_masm_); - temps.Exclude(reg_x(rd), reg_x(rn)); - Register temp = temps.AcquireX(); - ___ Add(temp, reg_x(rn), value); - ___ Csel(reg_x(rd), temp, reg_x(rd), cond); - } -} - -void Arm64Assembler::StoreWToOffset(StoreOperandType type, WRegister source, - XRegister base, int32_t offset) { - switch (type) { - case kStoreByte: - ___ Strb(reg_w(source), MEM_OP(reg_x(base), offset)); - break; - case kStoreHalfword: - ___ Strh(reg_w(source), MEM_OP(reg_x(base), offset)); - break; - case kStoreWord: - ___ Str(reg_w(source), MEM_OP(reg_x(base), offset)); - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } -} - -void Arm64Assembler::StoreToOffset(XRegister source, XRegister base, int32_t offset) { - CHECK_NE(source, SP); - ___ Str(reg_x(source), MEM_OP(reg_x(base), offset)); -} - -void Arm64Assembler::StoreSToOffset(SRegister source, XRegister base, int32_t offset) { - ___ Str(reg_s(source), MEM_OP(reg_x(base), offset)); -} - -void Arm64Assembler::StoreDToOffset(DRegister source, XRegister base, int32_t offset) { - ___ Str(reg_d(source), MEM_OP(reg_x(base), offset)); -} - -void Arm64Assembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) { - Arm64ManagedRegister src = m_src.AsArm64(); - if (src.IsNoRegister()) { - CHECK_EQ(0u, size); - } else if (src.IsWRegister()) { - CHECK_EQ(4u, size); - StoreWToOffset(kStoreWord, src.AsWRegister(), SP, offs.Int32Value()); - } else if (src.IsXRegister()) { - CHECK_EQ(8u, size); - StoreToOffset(src.AsXRegister(), SP, offs.Int32Value()); - } else if (src.IsSRegister()) { - StoreSToOffset(src.AsSRegister(), SP, offs.Int32Value()); - } else { - CHECK(src.IsDRegister()) << src; - StoreDToOffset(src.AsDRegister(), SP, offs.Int32Value()); - } -} - -void Arm64Assembler::StoreRef(FrameOffset offs, ManagedRegister m_src) { - Arm64ManagedRegister src = m_src.AsArm64(); - CHECK(src.IsXRegister()) << src; - StoreWToOffset(kStoreWord, src.AsOverlappingWRegister(), SP, - offs.Int32Value()); -} - -void Arm64Assembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) { - Arm64ManagedRegister src = m_src.AsArm64(); - CHECK(src.IsXRegister()) << src; - StoreToOffset(src.AsXRegister(), SP, offs.Int32Value()); -} - -void Arm64Assembler::StoreImmediateToFrame(FrameOffset offs, uint32_t imm, - ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - LoadImmediate(scratch.AsXRegister(), imm); - StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, - offs.Int32Value()); -} - -void Arm64Assembler::StoreStackOffsetToThread(ThreadOffset64 tr_offs, - FrameOffset fr_offs, - ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value()); - StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); -} - -void Arm64Assembler::StoreStackPointerToThread(ThreadOffset64 tr_offs) { - UseScratchRegisterScope temps(&vixl_masm_); - Register temp = temps.AcquireX(); - ___ Mov(temp, reg_x(SP)); - ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value())); -} - -void Arm64Assembler::StoreSpanning(FrameOffset dest_off, ManagedRegister m_source, - FrameOffset in_off, ManagedRegister m_scratch) { - Arm64ManagedRegister source = m_source.AsArm64(); - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - StoreToOffset(source.AsXRegister(), SP, dest_off.Int32Value()); - LoadFromOffset(scratch.AsXRegister(), SP, in_off.Int32Value()); - StoreToOffset(scratch.AsXRegister(), SP, dest_off.Int32Value() + 8); -} - -// Load routines. -void Arm64Assembler::LoadImmediate(XRegister dest, int32_t value, - Condition cond) { - if ((cond == al) || (cond == nv)) { - ___ Mov(reg_x(dest), value); - } else { - // temp = value - // rd = cond ? temp : rd - if (value != 0) { - UseScratchRegisterScope temps(&vixl_masm_); - temps.Exclude(reg_x(dest)); - Register temp = temps.AcquireX(); - ___ Mov(temp, value); - ___ Csel(reg_x(dest), temp, reg_x(dest), cond); - } else { - ___ Csel(reg_x(dest), reg_x(XZR), reg_x(dest), cond); - } - } -} - -void Arm64Assembler::LoadWFromOffset(LoadOperandType type, WRegister dest, - XRegister base, int32_t offset) { - switch (type) { - case kLoadSignedByte: - ___ Ldrsb(reg_w(dest), MEM_OP(reg_x(base), offset)); - break; - case kLoadSignedHalfword: - ___ Ldrsh(reg_w(dest), MEM_OP(reg_x(base), offset)); - break; - case kLoadUnsignedByte: - ___ Ldrb(reg_w(dest), MEM_OP(reg_x(base), offset)); - break; - case kLoadUnsignedHalfword: - ___ Ldrh(reg_w(dest), MEM_OP(reg_x(base), offset)); - break; - case kLoadWord: - ___ Ldr(reg_w(dest), MEM_OP(reg_x(base), offset)); - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } -} - -// Note: We can extend this member by adding load type info - see -// sign extended A64 load variants. -void Arm64Assembler::LoadFromOffset(XRegister dest, XRegister base, - int32_t offset) { - CHECK_NE(dest, SP); - ___ Ldr(reg_x(dest), MEM_OP(reg_x(base), offset)); -} - -void Arm64Assembler::LoadSFromOffset(SRegister dest, XRegister base, - int32_t offset) { - ___ Ldr(reg_s(dest), MEM_OP(reg_x(base), offset)); -} - -void Arm64Assembler::LoadDFromOffset(DRegister dest, XRegister base, - int32_t offset) { - ___ Ldr(reg_d(dest), MEM_OP(reg_x(base), offset)); -} - -void Arm64Assembler::Load(Arm64ManagedRegister dest, XRegister base, - int32_t offset, size_t size) { - if (dest.IsNoRegister()) { - CHECK_EQ(0u, size) << dest; - } else if (dest.IsWRegister()) { - CHECK_EQ(4u, size) << dest; - ___ Ldr(reg_w(dest.AsWRegister()), MEM_OP(reg_x(base), offset)); - } else if (dest.IsXRegister()) { - CHECK_NE(dest.AsXRegister(), SP) << dest; - if (size == 4u) { - ___ Ldr(reg_w(dest.AsOverlappingWRegister()), MEM_OP(reg_x(base), offset)); - } else { - CHECK_EQ(8u, size) << dest; - ___ Ldr(reg_x(dest.AsXRegister()), MEM_OP(reg_x(base), offset)); - } - } else if (dest.IsSRegister()) { - ___ Ldr(reg_s(dest.AsSRegister()), MEM_OP(reg_x(base), offset)); - } else { - CHECK(dest.IsDRegister()) << dest; - ___ Ldr(reg_d(dest.AsDRegister()), MEM_OP(reg_x(base), offset)); - } -} - -void Arm64Assembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) { - return Load(m_dst.AsArm64(), SP, src.Int32Value(), size); -} - -void Arm64Assembler::LoadFromThread(ManagedRegister m_dst, ThreadOffset64 src, size_t size) { - return Load(m_dst.AsArm64(), TR, src.Int32Value(), size); -} - -void Arm64Assembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) { - Arm64ManagedRegister dst = m_dst.AsArm64(); - CHECK(dst.IsXRegister()) << dst; - LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), SP, offs.Int32Value()); -} - -void Arm64Assembler::LoadRef(ManagedRegister m_dst, ManagedRegister m_base, MemberOffset offs, - bool unpoison_reference) { - Arm64ManagedRegister dst = m_dst.AsArm64(); - Arm64ManagedRegister base = m_base.AsArm64(); - CHECK(dst.IsXRegister() && base.IsXRegister()); - LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), base.AsXRegister(), - offs.Int32Value()); - if (unpoison_reference) { - WRegister ref_reg = dst.AsOverlappingWRegister(); - MaybeUnpoisonHeapReference(reg_w(ref_reg)); - } -} - void Arm64Assembler::LoadRawPtr(ManagedRegister m_dst, ManagedRegister m_base, Offset offs) { Arm64ManagedRegister dst = m_dst.AsArm64(); Arm64ManagedRegister base = m_base.AsArm64(); @@ -310,209 +59,6 @@ void Arm64Assembler::LoadRawPtr(ManagedRegister m_dst, ManagedRegister m_base, O ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value())); } -void Arm64Assembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset64 offs) { - Arm64ManagedRegister dst = m_dst.AsArm64(); - CHECK(dst.IsXRegister()) << dst; - LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value()); -} - -// Copying routines. -void Arm64Assembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t size) { - Arm64ManagedRegister dst = m_dst.AsArm64(); - Arm64ManagedRegister src = m_src.AsArm64(); - if (!dst.Equals(src)) { - if (dst.IsXRegister()) { - if (size == 4) { - CHECK(src.IsWRegister()); - ___ Mov(reg_w(dst.AsOverlappingWRegister()), reg_w(src.AsWRegister())); - } else { - if (src.IsXRegister()) { - ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsXRegister())); - } else { - ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsOverlappingXRegister())); - } - } - } else if (dst.IsWRegister()) { - CHECK(src.IsWRegister()) << src; - ___ Mov(reg_w(dst.AsWRegister()), reg_w(src.AsWRegister())); - } else if (dst.IsSRegister()) { - CHECK(src.IsSRegister()) << src; - ___ Fmov(reg_s(dst.AsSRegister()), reg_s(src.AsSRegister())); - } else { - CHECK(dst.IsDRegister()) << dst; - CHECK(src.IsDRegister()) << src; - ___ Fmov(reg_d(dst.AsDRegister()), reg_d(src.AsDRegister())); - } - } -} - -void Arm64Assembler::CopyRawPtrFromThread(FrameOffset fr_offs, - ThreadOffset64 tr_offs, - ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); - StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value()); -} - -void Arm64Assembler::CopyRawPtrToThread(ThreadOffset64 tr_offs, - FrameOffset fr_offs, - ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value()); - StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); -} - -void Arm64Assembler::CopyRef(FrameOffset dest, FrameOffset src, - ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), - SP, src.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), - SP, dest.Int32Value()); -} - -void Arm64Assembler::Copy(FrameOffset dest, FrameOffset src, - ManagedRegister m_scratch, size_t size) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - CHECK(size == 4 || size == 8) << size; - if (size == 4) { - LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, src.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, dest.Int32Value()); - } else if (size == 8) { - LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value()); - StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value()); - } else { - UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; - } -} - -void Arm64Assembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, - ManagedRegister m_scratch, size_t size) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - Arm64ManagedRegister base = src_base.AsArm64(); - CHECK(base.IsXRegister()) << base; - CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; - CHECK(size == 4 || size == 8) << size; - if (size == 4) { - LoadWFromOffset(kLoadWord, scratch.AsWRegister(), base.AsXRegister(), - src_offset.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsWRegister(), SP, dest.Int32Value()); - } else if (size == 8) { - LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), src_offset.Int32Value()); - StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value()); - } else { - UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; - } -} - -void Arm64Assembler::Copy(ManagedRegister m_dest_base, Offset dest_offs, FrameOffset src, - ManagedRegister m_scratch, size_t size) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - Arm64ManagedRegister base = m_dest_base.AsArm64(); - CHECK(base.IsXRegister()) << base; - CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; - CHECK(size == 4 || size == 8) << size; - if (size == 4) { - LoadWFromOffset(kLoadWord, scratch.AsWRegister(), SP, src.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsWRegister(), base.AsXRegister(), - dest_offs.Int32Value()); - } else if (size == 8) { - LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value()); - StoreToOffset(scratch.AsXRegister(), base.AsXRegister(), dest_offs.Int32Value()); - } else { - UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; - } -} - -void Arm64Assembler::Copy(FrameOffset /*dst*/, FrameOffset /*src_base*/, Offset /*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant"; -} - -void Arm64Assembler::Copy(ManagedRegister m_dest, Offset dest_offset, - ManagedRegister m_src, Offset src_offset, - ManagedRegister m_scratch, size_t size) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - Arm64ManagedRegister src = m_src.AsArm64(); - Arm64ManagedRegister dest = m_dest.AsArm64(); - CHECK(dest.IsXRegister()) << dest; - CHECK(src.IsXRegister()) << src; - CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; - CHECK(size == 4 || size == 8) << size; - if (size == 4) { - if (scratch.IsWRegister()) { - LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsXRegister(), - src_offset.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsXRegister(), - dest_offset.Int32Value()); - } else { - LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), src.AsXRegister(), - src_offset.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), dest.AsXRegister(), - dest_offset.Int32Value()); - } - } else if (size == 8) { - LoadFromOffset(scratch.AsXRegister(), src.AsXRegister(), src_offset.Int32Value()); - StoreToOffset(scratch.AsXRegister(), dest.AsXRegister(), dest_offset.Int32Value()); - } else { - UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; - } -} - -void Arm64Assembler::Copy(FrameOffset /*dst*/, Offset /*dest_offset*/, - FrameOffset /*src*/, Offset /*src_offset*/, - ManagedRegister /*scratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant"; -} - -void Arm64Assembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) { - // TODO: Should we check that m_scratch is IP? - see arm. - ___ Dmb(InnerShareable, BarrierAll); -} - -void Arm64Assembler::SignExtend(ManagedRegister mreg, size_t size) { - Arm64ManagedRegister reg = mreg.AsArm64(); - CHECK(size == 1 || size == 2) << size; - CHECK(reg.IsWRegister()) << reg; - if (size == 1) { - ___ Sxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); - } else { - ___ Sxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); - } -} - -void Arm64Assembler::ZeroExtend(ManagedRegister mreg, size_t size) { - Arm64ManagedRegister reg = mreg.AsArm64(); - CHECK(size == 1 || size == 2) << size; - CHECK(reg.IsWRegister()) << reg; - if (size == 1) { - ___ Uxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); - } else { - ___ Uxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); - } -} - -void Arm64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { - // TODO: not validating references. -} - -void Arm64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { - // TODO: not validating references. -} - -void Arm64Assembler::Call(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) { - Arm64ManagedRegister base = m_base.AsArm64(); - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(base.IsXRegister()) << base; - CHECK(scratch.IsXRegister()) << scratch; - LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value()); - ___ Blr(reg_x(scratch.AsXRegister())); -} - void Arm64Assembler::JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) { Arm64ManagedRegister base = m_base.AsArm64(); Arm64ManagedRegister scratch = m_scratch.AsArm64(); @@ -525,114 +71,6 @@ void Arm64Assembler::JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister ___ Br(reg_x(scratch.AsXRegister())); } -void Arm64Assembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - // Call *(*(SP + base) + offset) - LoadFromOffset(scratch.AsXRegister(), SP, base.Int32Value()); - LoadFromOffset(scratch.AsXRegister(), scratch.AsXRegister(), offs.Int32Value()); - ___ Blr(reg_x(scratch.AsXRegister())); -} - -void Arm64Assembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED, - ManagedRegister scratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant"; -} - -void Arm64Assembler::CreateHandleScopeEntry( - ManagedRegister m_out_reg, FrameOffset handle_scope_offs, ManagedRegister m_in_reg, - bool null_allowed) { - Arm64ManagedRegister out_reg = m_out_reg.AsArm64(); - Arm64ManagedRegister in_reg = m_in_reg.AsArm64(); - // For now we only hold stale handle scope entries in x registers. - CHECK(in_reg.IsNoRegister() || in_reg.IsXRegister()) << in_reg; - CHECK(out_reg.IsXRegister()) << out_reg; - if (null_allowed) { - // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is - // the address in the handle scope holding the reference. - // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) - if (in_reg.IsNoRegister()) { - LoadWFromOffset(kLoadWord, out_reg.AsOverlappingWRegister(), SP, - handle_scope_offs.Int32Value()); - in_reg = out_reg; - } - ___ Cmp(reg_w(in_reg.AsOverlappingWRegister()), 0); - if (!out_reg.Equals(in_reg)) { - LoadImmediate(out_reg.AsXRegister(), 0, eq); - } - AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), ne); - } else { - AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), al); - } -} - -void Arm64Assembler::CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handle_scope_offset, - ManagedRegister m_scratch, bool null_allowed) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - if (null_allowed) { - LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, - handle_scope_offset.Int32Value()); - // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is - // the address in the handle scope holding the reference. - // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset) - ___ Cmp(reg_w(scratch.AsOverlappingWRegister()), 0); - // Move this logic in add constants with flags. - AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), ne); - } else { - AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), al); - } - StoreToOffset(scratch.AsXRegister(), SP, out_off.Int32Value()); -} - -void Arm64Assembler::LoadReferenceFromHandleScope(ManagedRegister m_out_reg, - ManagedRegister m_in_reg) { - Arm64ManagedRegister out_reg = m_out_reg.AsArm64(); - Arm64ManagedRegister in_reg = m_in_reg.AsArm64(); - CHECK(out_reg.IsXRegister()) << out_reg; - CHECK(in_reg.IsXRegister()) << in_reg; - vixl::aarch64::Label exit; - if (!out_reg.Equals(in_reg)) { - // FIXME: Who sets the flags here? - LoadImmediate(out_reg.AsXRegister(), 0, eq); - } - ___ Cbz(reg_x(in_reg.AsXRegister()), &exit); - LoadFromOffset(out_reg.AsXRegister(), in_reg.AsXRegister(), 0); - ___ Bind(&exit); -} - -void Arm64Assembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) { - CHECK_ALIGNED(stack_adjust, kStackAlignment); - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - exception_blocks_.emplace_back(new Arm64Exception(scratch, stack_adjust)); - LoadFromOffset(scratch.AsXRegister(), - TR, - Thread::ExceptionOffset<kArm64PointerSize>().Int32Value()); - ___ Cbnz(reg_x(scratch.AsXRegister()), exception_blocks_.back()->Entry()); -} - -void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) { - UseScratchRegisterScope temps(&vixl_masm_); - temps.Exclude(reg_x(exception->scratch_.AsXRegister())); - Register temp = temps.AcquireX(); - - // Bind exception poll entry. - ___ Bind(exception->Entry()); - if (exception->stack_adjust_ != 0) { // Fix up the frame. - DecreaseFrameSize(exception->stack_adjust_); - } - // Pass exception object as argument. - // Don't care about preserving X0 as this won't return. - ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister())); - ___ Ldr(temp, - MEM_OP(reg_x(TR), - QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pDeliverException).Int32Value())); - - ___ Blr(temp); - // Call should never return. - ___ Brk(); -} - static inline dwarf::Reg DWARFReg(CPURegister reg) { if (reg.IsFPRegister()) { return dwarf::Reg::Arm64Fp(reg.GetCode()); @@ -696,105 +134,6 @@ void Arm64Assembler::UnspillRegisters(CPURegList registers, int offset) { DCHECK(registers.IsEmpty()); } -void Arm64Assembler::BuildFrame(size_t frame_size, - ManagedRegister method_reg, - ArrayRef<const ManagedRegister> callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) { - // Setup VIXL CPURegList for callee-saves. - CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0); - CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0); - for (auto r : callee_save_regs) { - Arm64ManagedRegister reg = r.AsArm64(); - if (reg.IsXRegister()) { - core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode()); - } else { - DCHECK(reg.IsDRegister()); - fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode()); - } - } - size_t core_reg_size = core_reg_list.GetTotalSizeInBytes(); - size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes(); - - // Increase frame to required size. - DCHECK_ALIGNED(frame_size, kStackAlignment); - DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize)); - IncreaseFrameSize(frame_size); - - // Save callee-saves. - SpillRegisters(core_reg_list, frame_size - core_reg_size); - SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size); - - DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR))); - - // Write ArtMethod* - DCHECK(X0 == method_reg.AsArm64().AsXRegister()); - StoreToOffset(X0, SP, 0); - - // Write out entry spills - int32_t offset = frame_size + static_cast<size_t>(kArm64PointerSize); - for (size_t i = 0; i < entry_spills.size(); ++i) { - Arm64ManagedRegister reg = entry_spills.at(i).AsArm64(); - if (reg.IsNoRegister()) { - // only increment stack offset. - ManagedRegisterSpill spill = entry_spills.at(i); - offset += spill.getSize(); - } else if (reg.IsXRegister()) { - StoreToOffset(reg.AsXRegister(), SP, offset); - offset += 8; - } else if (reg.IsWRegister()) { - StoreWToOffset(kStoreWord, reg.AsWRegister(), SP, offset); - offset += 4; - } else if (reg.IsDRegister()) { - StoreDToOffset(reg.AsDRegister(), SP, offset); - offset += 8; - } else if (reg.IsSRegister()) { - StoreSToOffset(reg.AsSRegister(), SP, offset); - offset += 4; - } - } -} - -void Arm64Assembler::RemoveFrame(size_t frame_size, - ArrayRef<const ManagedRegister> callee_save_regs) { - // Setup VIXL CPURegList for callee-saves. - CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0); - CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0); - for (auto r : callee_save_regs) { - Arm64ManagedRegister reg = r.AsArm64(); - if (reg.IsXRegister()) { - core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode()); - } else { - DCHECK(reg.IsDRegister()); - fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode()); - } - } - size_t core_reg_size = core_reg_list.GetTotalSizeInBytes(); - size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes(); - - // For now we only check that the size of the frame is large enough to hold spills and method - // reference. - DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize)); - DCHECK_ALIGNED(frame_size, kStackAlignment); - - DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR))); - - cfi_.RememberState(); - - // Restore callee-saves. - UnspillRegisters(core_reg_list, frame_size - core_reg_size); - UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size); - - // Decrease frame size to start of callee saved regs. - DecreaseFrameSize(frame_size); - - // Pop callee saved and return to LR. - ___ Ret(); - - // The CFI should be restored for any code that follows the exit block. - cfi_.RestoreState(); - cfi_.DefCFAOffset(frame_size); -} - void Arm64Assembler::PoisonHeapReference(Register reg) { DCHECK(reg.IsW()); // reg = -reg. diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index d7084dad1c..4e88e640e5 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -22,11 +22,9 @@ #include <vector> #include "base/arena_containers.h" -#include "base/enums.h" #include "base/logging.h" #include "utils/arm64/managed_register_arm64.h" #include "utils/assembler.h" -#include "utils/jni_macro_assembler.h" #include "offsets.h" // TODO: make vixl clean wrt -Wshadow, -Wunknown-pragmas, -Wmissing-noreturn @@ -63,38 +61,14 @@ enum StoreOperandType { kStoreDWord }; -class Arm64Exception { - private: - Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust) - : scratch_(scratch), stack_adjust_(stack_adjust) { - } - - vixl::aarch64::Label* Entry() { return &exception_entry_; } - - // Register used for passing Thread::Current()->exception_ . - const Arm64ManagedRegister scratch_; - - // Stack adjust for ExceptionPool. - const size_t stack_adjust_; - - vixl::aarch64::Label exception_entry_; - - friend class Arm64Assembler; - DISALLOW_COPY_AND_ASSIGN(Arm64Exception); -}; - -class Arm64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k64> { +class Arm64Assembler FINAL : public Assembler { public: - explicit Arm64Assembler(ArenaAllocator* arena) - : Assembler(arena), - exception_blocks_(arena->Adapter(kArenaAllocAssembler)) {} + explicit Arm64Assembler(ArenaAllocator* arena) : Assembler(arena) {} virtual ~Arm64Assembler() {} vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return &vixl_masm_; } - DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); } - // Finalize the code. void FinalizeCode() OVERRIDE; @@ -105,110 +79,14 @@ class Arm64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerS // Copy instructions out of assembly buffer into the given region of memory. void FinalizeInstructions(const MemoryRegion& region); + void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs); + void SpillRegisters(vixl::aarch64::CPURegList registers, int offset); void UnspillRegisters(vixl::aarch64::CPURegList registers, int offset); - // Emit code that will create an activation on the stack. - void BuildFrame(size_t frame_size, - ManagedRegister method_reg, - ArrayRef<const ManagedRegister> callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; - - // Emit code that will remove an activation from the stack. - void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) - OVERRIDE; - - void IncreaseFrameSize(size_t adjust) OVERRIDE; - void DecreaseFrameSize(size_t adjust) OVERRIDE; - - // Store routines. - void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; - void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; - void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; - void StoreStackOffsetToThread(ThreadOffset64 thr_offs, - FrameOffset fr_offs, - ManagedRegister scratch) OVERRIDE; - void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE; - void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off, - ManagedRegister scratch) OVERRIDE; - - // Load routines. - void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; - void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE; - void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; - void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) OVERRIDE; - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; - void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE; - - // Copying routines. - void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; - void CopyRawPtrFromThread(FrameOffset fr_offs, - ThreadOffset64 thr_offs, - ManagedRegister scratch) OVERRIDE; - void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) - OVERRIDE; - void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; - void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; - void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; - void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, - size_t size) OVERRIDE; - void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; - void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; - void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; - void MemoryBarrier(ManagedRegister scratch) OVERRIDE; - - // Sign extension. - void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; - - // Zero extension. - void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; - - // Exploit fast access in managed code to Thread::Current(). - void GetCurrentThread(ManagedRegister tr) OVERRIDE; - void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; - - // Set up out_reg to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. in_reg holds a possibly stale reference - // that can be used to avoid loading the handle scope entry to see if the value is - // null. - void CreateHandleScopeEntry(ManagedRegister out_reg, - FrameOffset handlescope_offset, - ManagedRegister in_reg, - bool null_allowed) OVERRIDE; - - // Set up out_off to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. - void CreateHandleScopeEntry(FrameOffset out_off, - FrameOffset handlescope_offset, - ManagedRegister scratch, - bool null_allowed) OVERRIDE; - - // src holds a handle scope entry (Object**) load this into dst. - void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; - - // Heap::VerifyObject on src. In some cases (such as a reference to this) we - // know that src may not be null. - void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; - void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; - - // Call to address held at [base+offset]. - void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; - void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; - void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE; - // Jump to address (not setting link register) void JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch); - // Generate code to check if Thread::Current()->exception_ is non-null - // and branch to a ExceptionSlowPath if it is. - void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; - // // Heap poisoning. // @@ -227,7 +105,6 @@ class Arm64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerS UNIMPLEMENTED(FATAL) << "Do not use Jump for ARM64"; } - private: static vixl::aarch64::Register reg_x(int code) { CHECK(code < kNumberOfXRegisters) << code; if (code == SP) { @@ -256,37 +133,7 @@ class Arm64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerS return vixl::aarch64::FPRegister::GetSRegFromCode(code); } - // Emits Exception block. - void EmitExceptionPoll(Arm64Exception *exception); - - void StoreWToOffset(StoreOperandType type, WRegister source, - XRegister base, int32_t offset); - void StoreToOffset(XRegister source, XRegister base, int32_t offset); - void StoreSToOffset(SRegister source, XRegister base, int32_t offset); - void StoreDToOffset(DRegister source, XRegister base, int32_t offset); - - void LoadImmediate(XRegister dest, - int32_t value, - vixl::aarch64::Condition cond = vixl::aarch64::al); - void Load(Arm64ManagedRegister dst, XRegister src, int32_t src_offset, size_t size); - void LoadWFromOffset(LoadOperandType type, - WRegister dest, - XRegister base, - int32_t offset); - void LoadFromOffset(XRegister dest, XRegister base, int32_t offset); - void LoadSFromOffset(SRegister dest, XRegister base, int32_t offset); - void LoadDFromOffset(DRegister dest, XRegister base, int32_t offset); - void AddConstant(XRegister rd, - int32_t value, - vixl::aarch64::Condition cond = vixl::aarch64::al); - void AddConstant(XRegister rd, - XRegister rn, - int32_t value, - vixl::aarch64::Condition cond = vixl::aarch64::al); - - // List of exception blocks to generate at the end of the code cache. - ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_; - + private: // VIXL assembler. vixl::aarch64::MacroAssembler vixl_masm_; diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc new file mode 100644 index 0000000000..dfdcd11893 --- /dev/null +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc @@ -0,0 +1,754 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni_macro_assembler_arm64.h" + +#include "base/logging.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "managed_register_arm64.h" +#include "offsets.h" +#include "thread.h" + +using namespace vixl::aarch64; // NOLINT(build/namespaces) + +namespace art { +namespace arm64 { + +#ifdef ___ +#error "ARM64 Assembler macro already defined." +#else +#define ___ asm_.GetVIXLAssembler()-> +#endif + +#define reg_x(X) Arm64Assembler::reg_x(X) +#define reg_w(W) Arm64Assembler::reg_w(W) +#define reg_d(D) Arm64Assembler::reg_d(D) +#define reg_s(S) Arm64Assembler::reg_s(S) + +Arm64JNIMacroAssembler::~Arm64JNIMacroAssembler() { +} + +void Arm64JNIMacroAssembler::FinalizeCode() { + for (const std::unique_ptr<Arm64Exception>& exception : exception_blocks_) { + EmitExceptionPoll(exception.get()); + } + ___ FinalizeCode(); +} + +void Arm64JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) { + ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR)); +} + +void Arm64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) { + StoreToOffset(TR, SP, offset.Int32Value()); +} + +// See Arm64 PCS Section 5.2.2.1. +void Arm64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) { + CHECK_ALIGNED(adjust, kStackAlignment); + AddConstant(SP, -adjust); + cfi().AdjustCFAOffset(adjust); +} + +// See Arm64 PCS Section 5.2.2.1. +void Arm64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) { + CHECK_ALIGNED(adjust, kStackAlignment); + AddConstant(SP, adjust); + cfi().AdjustCFAOffset(-adjust); +} + +void Arm64JNIMacroAssembler::AddConstant(XRegister rd, int32_t value, Condition cond) { + AddConstant(rd, rd, value, cond); +} + +void Arm64JNIMacroAssembler::AddConstant(XRegister rd, + XRegister rn, + int32_t value, + Condition cond) { + if ((cond == al) || (cond == nv)) { + // VIXL macro-assembler handles all variants. + ___ Add(reg_x(rd), reg_x(rn), value); + } else { + // temp = rd + value + // rd = cond ? temp : rn + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(reg_x(rd), reg_x(rn)); + Register temp = temps.AcquireX(); + ___ Add(temp, reg_x(rn), value); + ___ Csel(reg_x(rd), temp, reg_x(rd), cond); + } +} + +void Arm64JNIMacroAssembler::StoreWToOffset(StoreOperandType type, + WRegister source, + XRegister base, + int32_t offset) { + switch (type) { + case kStoreByte: + ___ Strb(reg_w(source), MEM_OP(reg_x(base), offset)); + break; + case kStoreHalfword: + ___ Strh(reg_w(source), MEM_OP(reg_x(base), offset)); + break; + case kStoreWord: + ___ Str(reg_w(source), MEM_OP(reg_x(base), offset)); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } +} + +void Arm64JNIMacroAssembler::StoreToOffset(XRegister source, XRegister base, int32_t offset) { + CHECK_NE(source, SP); + ___ Str(reg_x(source), MEM_OP(reg_x(base), offset)); +} + +void Arm64JNIMacroAssembler::StoreSToOffset(SRegister source, XRegister base, int32_t offset) { + ___ Str(reg_s(source), MEM_OP(reg_x(base), offset)); +} + +void Arm64JNIMacroAssembler::StoreDToOffset(DRegister source, XRegister base, int32_t offset) { + ___ Str(reg_d(source), MEM_OP(reg_x(base), offset)); +} + +void Arm64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) { + Arm64ManagedRegister src = m_src.AsArm64(); + if (src.IsNoRegister()) { + CHECK_EQ(0u, size); + } else if (src.IsWRegister()) { + CHECK_EQ(4u, size); + StoreWToOffset(kStoreWord, src.AsWRegister(), SP, offs.Int32Value()); + } else if (src.IsXRegister()) { + CHECK_EQ(8u, size); + StoreToOffset(src.AsXRegister(), SP, offs.Int32Value()); + } else if (src.IsSRegister()) { + StoreSToOffset(src.AsSRegister(), SP, offs.Int32Value()); + } else { + CHECK(src.IsDRegister()) << src; + StoreDToOffset(src.AsDRegister(), SP, offs.Int32Value()); + } +} + +void Arm64JNIMacroAssembler::StoreRef(FrameOffset offs, ManagedRegister m_src) { + Arm64ManagedRegister src = m_src.AsArm64(); + CHECK(src.IsXRegister()) << src; + StoreWToOffset(kStoreWord, src.AsOverlappingWRegister(), SP, + offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) { + Arm64ManagedRegister src = m_src.AsArm64(); + CHECK(src.IsXRegister()) << src; + StoreToOffset(src.AsXRegister(), SP, offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset offs, + uint32_t imm, + ManagedRegister m_scratch) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + LoadImmediate(scratch.AsXRegister(), imm); + StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, + offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 tr_offs, + FrameOffset fr_offs, + ManagedRegister m_scratch) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value()); + StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 tr_offs) { + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + Register temp = temps.AcquireX(); + ___ Mov(temp, reg_x(SP)); + ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value())); +} + +void Arm64JNIMacroAssembler::StoreSpanning(FrameOffset dest_off, + ManagedRegister m_source, + FrameOffset in_off, + ManagedRegister m_scratch) { + Arm64ManagedRegister source = m_source.AsArm64(); + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + StoreToOffset(source.AsXRegister(), SP, dest_off.Int32Value()); + LoadFromOffset(scratch.AsXRegister(), SP, in_off.Int32Value()); + StoreToOffset(scratch.AsXRegister(), SP, dest_off.Int32Value() + 8); +} + +// Load routines. +void Arm64JNIMacroAssembler::LoadImmediate(XRegister dest, int32_t value, Condition cond) { + if ((cond == al) || (cond == nv)) { + ___ Mov(reg_x(dest), value); + } else { + // temp = value + // rd = cond ? temp : rd + if (value != 0) { + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(reg_x(dest)); + Register temp = temps.AcquireX(); + ___ Mov(temp, value); + ___ Csel(reg_x(dest), temp, reg_x(dest), cond); + } else { + ___ Csel(reg_x(dest), reg_x(XZR), reg_x(dest), cond); + } + } +} + +void Arm64JNIMacroAssembler::LoadWFromOffset(LoadOperandType type, + WRegister dest, + XRegister base, + int32_t offset) { + switch (type) { + case kLoadSignedByte: + ___ Ldrsb(reg_w(dest), MEM_OP(reg_x(base), offset)); + break; + case kLoadSignedHalfword: + ___ Ldrsh(reg_w(dest), MEM_OP(reg_x(base), offset)); + break; + case kLoadUnsignedByte: + ___ Ldrb(reg_w(dest), MEM_OP(reg_x(base), offset)); + break; + case kLoadUnsignedHalfword: + ___ Ldrh(reg_w(dest), MEM_OP(reg_x(base), offset)); + break; + case kLoadWord: + ___ Ldr(reg_w(dest), MEM_OP(reg_x(base), offset)); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } +} + +// Note: We can extend this member by adding load type info - see +// sign extended A64 load variants. +void Arm64JNIMacroAssembler::LoadFromOffset(XRegister dest, XRegister base, int32_t offset) { + CHECK_NE(dest, SP); + ___ Ldr(reg_x(dest), MEM_OP(reg_x(base), offset)); +} + +void Arm64JNIMacroAssembler::LoadSFromOffset(SRegister dest, XRegister base, int32_t offset) { + ___ Ldr(reg_s(dest), MEM_OP(reg_x(base), offset)); +} + +void Arm64JNIMacroAssembler::LoadDFromOffset(DRegister dest, XRegister base, int32_t offset) { + ___ Ldr(reg_d(dest), MEM_OP(reg_x(base), offset)); +} + +void Arm64JNIMacroAssembler::Load(Arm64ManagedRegister dest, + XRegister base, + int32_t offset, + size_t size) { + if (dest.IsNoRegister()) { + CHECK_EQ(0u, size) << dest; + } else if (dest.IsWRegister()) { + CHECK_EQ(4u, size) << dest; + ___ Ldr(reg_w(dest.AsWRegister()), MEM_OP(reg_x(base), offset)); + } else if (dest.IsXRegister()) { + CHECK_NE(dest.AsXRegister(), SP) << dest; + if (size == 4u) { + ___ Ldr(reg_w(dest.AsOverlappingWRegister()), MEM_OP(reg_x(base), offset)); + } else { + CHECK_EQ(8u, size) << dest; + ___ Ldr(reg_x(dest.AsXRegister()), MEM_OP(reg_x(base), offset)); + } + } else if (dest.IsSRegister()) { + ___ Ldr(reg_s(dest.AsSRegister()), MEM_OP(reg_x(base), offset)); + } else { + CHECK(dest.IsDRegister()) << dest; + ___ Ldr(reg_d(dest.AsDRegister()), MEM_OP(reg_x(base), offset)); + } +} + +void Arm64JNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) { + return Load(m_dst.AsArm64(), SP, src.Int32Value(), size); +} + +void Arm64JNIMacroAssembler::LoadFromThread(ManagedRegister m_dst, + ThreadOffset64 src, + size_t size) { + return Load(m_dst.AsArm64(), TR, src.Int32Value(), size); +} + +void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) { + Arm64ManagedRegister dst = m_dst.AsArm64(); + CHECK(dst.IsXRegister()) << dst; + LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), SP, offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst, + ManagedRegister m_base, + MemberOffset offs, + bool unpoison_reference) { + Arm64ManagedRegister dst = m_dst.AsArm64(); + Arm64ManagedRegister base = m_base.AsArm64(); + CHECK(dst.IsXRegister() && base.IsXRegister()); + LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), base.AsXRegister(), + offs.Int32Value()); + if (unpoison_reference) { + WRegister ref_reg = dst.AsOverlappingWRegister(); + asm_.MaybeUnpoisonHeapReference(reg_w(ref_reg)); + } +} + +void Arm64JNIMacroAssembler::LoadRawPtr(ManagedRegister m_dst, + ManagedRegister m_base, + Offset offs) { + Arm64ManagedRegister dst = m_dst.AsArm64(); + Arm64ManagedRegister base = m_base.AsArm64(); + CHECK(dst.IsXRegister() && base.IsXRegister()); + // Remove dst and base form the temp list - higher level API uses IP1, IP0. + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister())); + ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value())); +} + +void Arm64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset64 offs) { + Arm64ManagedRegister dst = m_dst.AsArm64(); + CHECK(dst.IsXRegister()) << dst; + LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value()); +} + +// Copying routines. +void Arm64JNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t size) { + Arm64ManagedRegister dst = m_dst.AsArm64(); + Arm64ManagedRegister src = m_src.AsArm64(); + if (!dst.Equals(src)) { + if (dst.IsXRegister()) { + if (size == 4) { + CHECK(src.IsWRegister()); + ___ Mov(reg_w(dst.AsOverlappingWRegister()), reg_w(src.AsWRegister())); + } else { + if (src.IsXRegister()) { + ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsXRegister())); + } else { + ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsOverlappingXRegister())); + } + } + } else if (dst.IsWRegister()) { + CHECK(src.IsWRegister()) << src; + ___ Mov(reg_w(dst.AsWRegister()), reg_w(src.AsWRegister())); + } else if (dst.IsSRegister()) { + CHECK(src.IsSRegister()) << src; + ___ Fmov(reg_s(dst.AsSRegister()), reg_s(src.AsSRegister())); + } else { + CHECK(dst.IsDRegister()) << dst; + CHECK(src.IsDRegister()) << src; + ___ Fmov(reg_d(dst.AsDRegister()), reg_d(src.AsDRegister())); + } + } +} + +void Arm64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset64 tr_offs, + ManagedRegister m_scratch) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); + StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 tr_offs, + FrameOffset fr_offs, + ManagedRegister m_scratch) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value()); + StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister m_scratch) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), + SP, src.Int32Value()); + StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), + SP, dest.Int32Value()); +} + +void Arm64JNIMacroAssembler::Copy(FrameOffset dest, + FrameOffset src, + ManagedRegister m_scratch, + size_t size) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, src.Int32Value()); + StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, dest.Int32Value()); + } else if (size == 8) { + LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value()); + StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; + } +} + +void Arm64JNIMacroAssembler::Copy(FrameOffset dest, + ManagedRegister src_base, + Offset src_offset, + ManagedRegister m_scratch, + size_t size) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + Arm64ManagedRegister base = src_base.AsArm64(); + CHECK(base.IsXRegister()) << base; + CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + LoadWFromOffset(kLoadWord, scratch.AsWRegister(), base.AsXRegister(), + src_offset.Int32Value()); + StoreWToOffset(kStoreWord, scratch.AsWRegister(), SP, dest.Int32Value()); + } else if (size == 8) { + LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), src_offset.Int32Value()); + StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; + } +} + +void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest_base, + Offset dest_offs, + FrameOffset src, + ManagedRegister m_scratch, + size_t size) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + Arm64ManagedRegister base = m_dest_base.AsArm64(); + CHECK(base.IsXRegister()) << base; + CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + LoadWFromOffset(kLoadWord, scratch.AsWRegister(), SP, src.Int32Value()); + StoreWToOffset(kStoreWord, scratch.AsWRegister(), base.AsXRegister(), + dest_offs.Int32Value()); + } else if (size == 8) { + LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value()); + StoreToOffset(scratch.AsXRegister(), base.AsXRegister(), dest_offs.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; + } +} + +void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/, + FrameOffset /*src_base*/, + Offset /*src_offset*/, + ManagedRegister /*mscratch*/, + size_t /*size*/) { + UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant"; +} + +void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest, + Offset dest_offset, + ManagedRegister m_src, + Offset src_offset, + ManagedRegister m_scratch, + size_t size) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + Arm64ManagedRegister src = m_src.AsArm64(); + Arm64ManagedRegister dest = m_dest.AsArm64(); + CHECK(dest.IsXRegister()) << dest; + CHECK(src.IsXRegister()) << src; + CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + if (scratch.IsWRegister()) { + LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsXRegister(), + src_offset.Int32Value()); + StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsXRegister(), + dest_offset.Int32Value()); + } else { + LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), src.AsXRegister(), + src_offset.Int32Value()); + StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), dest.AsXRegister(), + dest_offset.Int32Value()); + } + } else if (size == 8) { + LoadFromOffset(scratch.AsXRegister(), src.AsXRegister(), src_offset.Int32Value()); + StoreToOffset(scratch.AsXRegister(), dest.AsXRegister(), dest_offset.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; + } +} + +void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/, + Offset /*dest_offset*/, + FrameOffset /*src*/, + Offset /*src_offset*/, + ManagedRegister /*scratch*/, + size_t /*size*/) { + UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant"; +} + +void Arm64JNIMacroAssembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) { + // TODO: Should we check that m_scratch is IP? - see arm. + ___ Dmb(InnerShareable, BarrierAll); +} + +void Arm64JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) { + Arm64ManagedRegister reg = mreg.AsArm64(); + CHECK(size == 1 || size == 2) << size; + CHECK(reg.IsWRegister()) << reg; + if (size == 1) { + ___ Sxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); + } else { + ___ Sxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); + } +} + +void Arm64JNIMacroAssembler::ZeroExtend(ManagedRegister mreg, size_t size) { + Arm64ManagedRegister reg = mreg.AsArm64(); + CHECK(size == 1 || size == 2) << size; + CHECK(reg.IsWRegister()) << reg; + if (size == 1) { + ___ Uxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); + } else { + ___ Uxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); + } +} + +void Arm64JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { + // TODO: not validating references. +} + +void Arm64JNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { + // TODO: not validating references. +} + +void Arm64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) { + Arm64ManagedRegister base = m_base.AsArm64(); + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(base.IsXRegister()) << base; + CHECK(scratch.IsXRegister()) << scratch; + LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value()); + ___ Blr(reg_x(scratch.AsXRegister())); +} + +void Arm64JNIMacroAssembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scratch) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + // Call *(*(SP + base) + offset) + LoadFromOffset(scratch.AsXRegister(), SP, base.Int32Value()); + LoadFromOffset(scratch.AsXRegister(), scratch.AsXRegister(), offs.Int32Value()); + ___ Blr(reg_x(scratch.AsXRegister())); +} + +void Arm64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED, + ManagedRegister scratch ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant"; +} + +void Arm64JNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister m_out_reg, + FrameOffset handle_scope_offs, + ManagedRegister m_in_reg, + bool null_allowed) { + Arm64ManagedRegister out_reg = m_out_reg.AsArm64(); + Arm64ManagedRegister in_reg = m_in_reg.AsArm64(); + // For now we only hold stale handle scope entries in x registers. + CHECK(in_reg.IsNoRegister() || in_reg.IsXRegister()) << in_reg; + CHECK(out_reg.IsXRegister()) << out_reg; + if (null_allowed) { + // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is + // the address in the handle scope holding the reference. + // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) + if (in_reg.IsNoRegister()) { + LoadWFromOffset(kLoadWord, out_reg.AsOverlappingWRegister(), SP, + handle_scope_offs.Int32Value()); + in_reg = out_reg; + } + ___ Cmp(reg_w(in_reg.AsOverlappingWRegister()), 0); + if (!out_reg.Equals(in_reg)) { + LoadImmediate(out_reg.AsXRegister(), 0, eq); + } + AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), ne); + } else { + AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), al); + } +} + +void Arm64JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handle_scope_offset, + ManagedRegister m_scratch, + bool null_allowed) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + if (null_allowed) { + LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, + handle_scope_offset.Int32Value()); + // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is + // the address in the handle scope holding the reference. + // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset) + ___ Cmp(reg_w(scratch.AsOverlappingWRegister()), 0); + // Move this logic in add constants with flags. + AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), ne); + } else { + AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), al); + } + StoreToOffset(scratch.AsXRegister(), SP, out_off.Int32Value()); +} + +void Arm64JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister m_out_reg, + ManagedRegister m_in_reg) { + Arm64ManagedRegister out_reg = m_out_reg.AsArm64(); + Arm64ManagedRegister in_reg = m_in_reg.AsArm64(); + CHECK(out_reg.IsXRegister()) << out_reg; + CHECK(in_reg.IsXRegister()) << in_reg; + vixl::aarch64::Label exit; + if (!out_reg.Equals(in_reg)) { + // FIXME: Who sets the flags here? + LoadImmediate(out_reg.AsXRegister(), 0, eq); + } + ___ Cbz(reg_x(in_reg.AsXRegister()), &exit); + LoadFromOffset(out_reg.AsXRegister(), in_reg.AsXRegister(), 0); + ___ Bind(&exit); +} + +void Arm64JNIMacroAssembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) { + CHECK_ALIGNED(stack_adjust, kStackAlignment); + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + exception_blocks_.emplace_back(new Arm64Exception(scratch, stack_adjust)); + LoadFromOffset(scratch.AsXRegister(), + TR, + Thread::ExceptionOffset<kArm64PointerSize>().Int32Value()); + ___ Cbnz(reg_x(scratch.AsXRegister()), exception_blocks_.back()->Entry()); +} + +void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception *exception) { + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(reg_x(exception->scratch_.AsXRegister())); + Register temp = temps.AcquireX(); + + // Bind exception poll entry. + ___ Bind(exception->Entry()); + if (exception->stack_adjust_ != 0) { // Fix up the frame. + DecreaseFrameSize(exception->stack_adjust_); + } + // Pass exception object as argument. + // Don't care about preserving X0 as this won't return. + ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister())); + ___ Ldr(temp, + MEM_OP(reg_x(TR), + QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pDeliverException).Int32Value())); + + ___ Blr(temp); + // Call should never return. + ___ Brk(); +} + +void Arm64JNIMacroAssembler::BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) { + // Setup VIXL CPURegList for callee-saves. + CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0); + CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0); + for (auto r : callee_save_regs) { + Arm64ManagedRegister reg = r.AsArm64(); + if (reg.IsXRegister()) { + core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode()); + } else { + DCHECK(reg.IsDRegister()); + fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode()); + } + } + size_t core_reg_size = core_reg_list.GetTotalSizeInBytes(); + size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes(); + + // Increase frame to required size. + DCHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize)); + IncreaseFrameSize(frame_size); + + // Save callee-saves. + asm_.SpillRegisters(core_reg_list, frame_size - core_reg_size); + asm_.SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size); + + DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR))); + + // Write ArtMethod* + DCHECK(X0 == method_reg.AsArm64().AsXRegister()); + StoreToOffset(X0, SP, 0); + + // Write out entry spills + int32_t offset = frame_size + static_cast<size_t>(kArm64PointerSize); + for (size_t i = 0; i < entry_spills.size(); ++i) { + Arm64ManagedRegister reg = entry_spills.at(i).AsArm64(); + if (reg.IsNoRegister()) { + // only increment stack offset. + ManagedRegisterSpill spill = entry_spills.at(i); + offset += spill.getSize(); + } else if (reg.IsXRegister()) { + StoreToOffset(reg.AsXRegister(), SP, offset); + offset += 8; + } else if (reg.IsWRegister()) { + StoreWToOffset(kStoreWord, reg.AsWRegister(), SP, offset); + offset += 4; + } else if (reg.IsDRegister()) { + StoreDToOffset(reg.AsDRegister(), SP, offset); + offset += 8; + } else if (reg.IsSRegister()) { + StoreSToOffset(reg.AsSRegister(), SP, offset); + offset += 4; + } + } +} + +void Arm64JNIMacroAssembler::RemoveFrame(size_t frame_size, + ArrayRef<const ManagedRegister> callee_save_regs) { + // Setup VIXL CPURegList for callee-saves. + CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0); + CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0); + for (auto r : callee_save_regs) { + Arm64ManagedRegister reg = r.AsArm64(); + if (reg.IsXRegister()) { + core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode()); + } else { + DCHECK(reg.IsDRegister()); + fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode()); + } + } + size_t core_reg_size = core_reg_list.GetTotalSizeInBytes(); + size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes(); + + // For now we only check that the size of the frame is large enough to hold spills and method + // reference. + DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize)); + DCHECK_ALIGNED(frame_size, kStackAlignment); + + DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR))); + + cfi().RememberState(); + + // Restore callee-saves. + asm_.UnspillRegisters(core_reg_list, frame_size - core_reg_size); + asm_.UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size); + + // Decrease frame size to start of callee saved regs. + DecreaseFrameSize(frame_size); + + // Pop callee saved and return to LR. + ___ Ret(); + + // The CFI should be restored for any code that follows the exit block. + cfi().RestoreState(); + cfi().DefCFAOffset(frame_size); +} + +#undef ___ + +} // namespace arm64 +} // namespace art diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h new file mode 100644 index 0000000000..79ee441144 --- /dev/null +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h @@ -0,0 +1,230 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_ +#define ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_ + +#include <stdint.h> +#include <memory> +#include <vector> + +#include "assembler_arm64.h" +#include "base/arena_containers.h" +#include "base/enums.h" +#include "base/logging.h" +#include "utils/assembler.h" +#include "utils/jni_macro_assembler.h" +#include "offsets.h" + +// TODO: make vixl clean wrt -Wshadow, -Wunknown-pragmas, -Wmissing-noreturn +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunknown-pragmas" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wmissing-noreturn" +#include "a64/macro-assembler-a64.h" +#pragma GCC diagnostic pop + +namespace art { +namespace arm64 { + +class Arm64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<Arm64Assembler, PointerSize::k64> { + public: + explicit Arm64JNIMacroAssembler(ArenaAllocator* arena) + : JNIMacroAssemblerFwd(arena), + exception_blocks_(arena->Adapter(kArenaAllocAssembler)) {} + + ~Arm64JNIMacroAssembler(); + + // Finalize the code. + void FinalizeCode() OVERRIDE; + + // Emit code that will create an activation on the stack. + void BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + + // Emit code that will remove an activation from the stack. + void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) + OVERRIDE; + + void IncreaseFrameSize(size_t adjust) OVERRIDE; + void DecreaseFrameSize(size_t adjust) OVERRIDE; + + // Store routines. + void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; + void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; + void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; + void StoreStackOffsetToThread(ThreadOffset64 thr_offs, + FrameOffset fr_offs, + ManagedRegister scratch) OVERRIDE; + void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE; + void StoreSpanning(FrameOffset dest, + ManagedRegister src, + FrameOffset in_off, + ManagedRegister scratch) OVERRIDE; + + // Load routines. + void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; + void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE; + void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + void LoadRef(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs, + bool unpoison_reference) OVERRIDE; + void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; + void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE; + + // Copying routines. + void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; + void CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset64 thr_offs, + ManagedRegister scratch) OVERRIDE; + void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) + OVERRIDE; + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; + void Copy(FrameOffset dest, + ManagedRegister src_base, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + void Copy(ManagedRegister dest_base, + Offset dest_offset, + FrameOffset src, + ManagedRegister scratch, + size_t size) OVERRIDE; + void Copy(FrameOffset dest, + FrameOffset src_base, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + void Copy(ManagedRegister dest, + Offset dest_offset, + ManagedRegister src, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + void Copy(FrameOffset dest, + Offset dest_offset, + FrameOffset src, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + void MemoryBarrier(ManagedRegister scratch) OVERRIDE; + + // Sign extension. + void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Zero extension. + void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Exploit fast access in managed code to Thread::Current(). + void GetCurrentThread(ManagedRegister tr) OVERRIDE; + void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; + + // Set up out_reg to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. in_reg holds a possibly stale reference + // that can be used to avoid loading the handle scope entry to see if the value is + // null. + void CreateHandleScopeEntry(ManagedRegister out_reg, + FrameOffset handlescope_offset, + ManagedRegister in_reg, + bool null_allowed) OVERRIDE; + + // Set up out_off to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. + void CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handlescope_offset, + ManagedRegister scratch, + bool null_allowed) OVERRIDE; + + // src holds a handle scope entry (Object**) load this into dst. + void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; + + // Heap::VerifyObject on src. In some cases (such as a reference to this) we + // know that src may not be null. + void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; + void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + + // Call to address held at [base+offset]. + void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; + void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; + void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE; + + // Generate code to check if Thread::Current()->exception_ is non-null + // and branch to a ExceptionSlowPath if it is. + void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; + + private: + class Arm64Exception { + public: + Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust) + : scratch_(scratch), stack_adjust_(stack_adjust) {} + + vixl::aarch64::Label* Entry() { return &exception_entry_; } + + // Register used for passing Thread::Current()->exception_ . + const Arm64ManagedRegister scratch_; + + // Stack adjust for ExceptionPool. + const size_t stack_adjust_; + + vixl::aarch64::Label exception_entry_; + + private: + DISALLOW_COPY_AND_ASSIGN(Arm64Exception); + }; + + // Emits Exception block. + void EmitExceptionPoll(Arm64Exception *exception); + + void StoreWToOffset(StoreOperandType type, + WRegister source, + XRegister base, + int32_t offset); + void StoreToOffset(XRegister source, XRegister base, int32_t offset); + void StoreSToOffset(SRegister source, XRegister base, int32_t offset); + void StoreDToOffset(DRegister source, XRegister base, int32_t offset); + + void LoadImmediate(XRegister dest, + int32_t value, + vixl::aarch64::Condition cond = vixl::aarch64::al); + void Load(Arm64ManagedRegister dst, XRegister src, int32_t src_offset, size_t size); + void LoadWFromOffset(LoadOperandType type, + WRegister dest, + XRegister base, + int32_t offset); + void LoadFromOffset(XRegister dest, XRegister base, int32_t offset); + void LoadSFromOffset(SRegister dest, XRegister base, int32_t offset); + void LoadDFromOffset(DRegister dest, XRegister base, int32_t offset); + void AddConstant(XRegister rd, + int32_t value, + vixl::aarch64::Condition cond = vixl::aarch64::al); + void AddConstant(XRegister rd, + XRegister rn, + int32_t value, + vixl::aarch64::Condition cond = vixl::aarch64::al); + + // List of exception blocks to generate at the end of the code cache. + ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_; +}; + +} // namespace arm64 +} // namespace art + +#endif // ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_ diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc index 1acc90ca6f..797a98cfd5 100644 --- a/compiler/utils/jni_macro_assembler.cc +++ b/compiler/utils/jni_macro_assembler.cc @@ -20,11 +20,10 @@ #include <vector> #ifdef ART_ENABLE_CODEGEN_arm -#include "arm/assembler_arm32.h" -#include "arm/assembler_thumb2.h" +#include "arm/jni_macro_assembler_arm.h" #endif #ifdef ART_ENABLE_CODEGEN_arm64 -#include "arm64/assembler_arm64.h" +#include "arm64/jni_macro_assembler_arm64.h" #endif #ifdef ART_ENABLE_CODEGEN_mips #include "mips/assembler_mips.h" @@ -58,9 +57,8 @@ MacroAsm32UniquePtr JNIMacroAssembler<PointerSize::k32>::Create( switch (instruction_set) { #ifdef ART_ENABLE_CODEGEN_arm case kArm: - return MacroAsm32UniquePtr(new (arena) arm::Arm32Assembler(arena)); case kThumb2: - return MacroAsm32UniquePtr(new (arena) arm::Thumb2Assembler(arena)); + return MacroAsm32UniquePtr(new (arena) arm::ArmJNIMacroAssembler(arena, instruction_set)); #endif #ifdef ART_ENABLE_CODEGEN_mips case kMips: @@ -90,7 +88,7 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create( switch (instruction_set) { #ifdef ART_ENABLE_CODEGEN_arm64 case kArm64: - return MacroAsm64UniquePtr(new (arena) arm64::Arm64Assembler(arena)); + return MacroAsm64UniquePtr(new (arena) arm64::Arm64JNIMacroAssembler(arena)); #endif #ifdef ART_ENABLE_CODEGEN_mips64 case kMips64: diff --git a/compiler/utils/x86/constants_x86.h b/compiler/utils/x86/constants_x86.h index 2dfb65c479..0bc1560ed7 100644 --- a/compiler/utils/x86/constants_x86.h +++ b/compiler/utils/x86/constants_x86.h @@ -97,6 +97,8 @@ enum Condition { kNotZero = kNotEqual, kNegative = kSign, kPositive = kNotSign, + kCarrySet = kBelow, + kCarryClear = kAboveEqual, kUnordered = kParityEven }; diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h index 37db6b1543..cc508a196b 100644 --- a/compiler/utils/x86_64/constants_x86_64.h +++ b/compiler/utils/x86_64/constants_x86_64.h @@ -106,6 +106,8 @@ enum Condition { kNotZero = kNotEqual, kNegative = kSign, kPositive = kNotSign, + kCarrySet = kBelow, + kCarryClear = kAboveEqual, kUnordered = kParityEven }; |