diff options
-rw-r--r-- | compiler/Android.mk | 3 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips64.cc | 174 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips64.h | 24 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_mips64.cc | 18 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_cfi_test.cc | 29 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_cfi_test_expected.inc | 54 | ||||
-rw-r--r-- | compiler/utils/assembler_test.h | 5 | ||||
-rw-r--r-- | compiler/utils/mips64/assembler_mips64.cc | 1007 | ||||
-rw-r--r-- | compiler/utils/mips64/assembler_mips64.h | 387 | ||||
-rw-r--r-- | compiler/utils/mips64/assembler_mips64_test.cc | 286 |
10 files changed, 1590 insertions, 397 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 42ddfd83ab..b05f479da4 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -219,7 +219,8 @@ LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips := \ utils/mips/assembler_mips.h LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips64 := \ - $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips) + $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips) \ + utils/mips64/assembler_mips64.h LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86 := LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86_64 := \ diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 7b33075358..8530fe7a36 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -27,8 +27,8 @@ #include "mirror/class-inl.h" #include "offsets.h" #include "thread.h" -#include "utils/mips64/assembler_mips64.h" #include "utils/assembler.h" +#include "utils/mips64/assembler_mips64.h" #include "utils/stack_checks.h" namespace art { @@ -210,7 +210,7 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; } @@ -257,7 +257,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { type); RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; } @@ -312,13 +312,13 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { - __ B(GetReturnLabel()); + __ Bc(GetReturnLabel()); } else { - __ B(mips64_codegen->GetLabelOf(successor_)); + __ Bc(mips64_codegen->GetLabelOf(successor_)); } } - Label* GetReturnLabel() { + Mips64Label* GetReturnLabel() { DCHECK(successor_ == nullptr); return &return_label_; } @@ -331,7 +331,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { HBasicBlock* const successor_; // If `successor_` is null, the label to branch to after the suspend check. - Label return_label_; + Mips64Label return_label_; DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS64); }; @@ -380,7 +380,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; } @@ -441,6 +441,32 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value() void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) { + // Ensure that we fix up branches. + __ FinalizeCode(); + + // Adjust native pc offsets in stack maps. + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset; + uint32_t new_position = __ GetAdjustedPosition(old_position); + DCHECK_GE(new_position, old_position); + stack_map_stream_.SetStackMapNativePcOffset(i, new_position); + } + + // Adjust pc offsets for the disassembly information. + if (disasm_info_ != nullptr) { + GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval(); + frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start); + frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end); + for (auto& it : *disasm_info_->GetInstructionIntervals()) { + it.second.start = __ GetAdjustedPosition(it.second.start); + it.second.end = __ GetAdjustedPosition(it.second.end); + } + for (auto& it : *disasm_info_->GetSlowPathIntervals()) { + it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start); + it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end); + } + } + CodeGenerator::Finalize(allocator); } @@ -603,6 +629,7 @@ void CodeGeneratorMIPS64::GenerateFrameExit() { } __ Jr(RA); + __ Nop(); __ cfi().RestoreState(); __ cfi().DefCFAOffset(GetFrameSize()); @@ -939,7 +966,7 @@ Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const { } void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) { - Label done; + Mips64Label done; GpuRegister card = AT; GpuRegister temp = TMP; __ Beqzc(value, &done); @@ -1048,6 +1075,7 @@ void CodeGeneratorMIPS64::InvokeRuntime(int32_t entry_point_offset, // TODO: anything related to T9/GP/GOT/PIC/.so's? __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); __ Jalr(T9); + __ Nop(); RecordPcInfo(instruction, dex_pc, slow_path); } @@ -1079,7 +1107,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc __ Bind(slow_path->GetReturnLabel()); } else { __ Beqzc(TMP, codegen_->GetLabelOf(successor)); - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); // slow_path will return to GetLabelOf(successor). } } @@ -1669,12 +1697,7 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) // length is limited by the maximum positive signed 32-bit integer. // Unsigned comparison of length and index checks for index < 0 // and for length <= index simultaneously. - // Mips R6 requires lhs != rhs for compact branches. - if (index == length) { - __ B(slow_path->GetEntryLabel()); - } else { - __ Bgeuc(index, length, slow_path->GetEntryLabel()); - } + __ Bgeuc(index, length, slow_path->GetEntryLabel()); } void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { @@ -2264,7 +2287,7 @@ void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instructio if (value.IsConstant()) { int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant()); if (divisor == 0) { - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); } else { // A division by a non-null constant is valid. We don't need to perform // any check, so simply fall through. @@ -2316,7 +2339,7 @@ void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock* GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); } if (!codegen_->GoesToNextBlock(block, successor)) { - __ B(codegen_->GetLabelOf(successor)); + __ Bc(codegen_->GetLabelOf(successor)); } } @@ -2341,8 +2364,8 @@ void InstructionCodeGeneratorMIPS64::VisitTryBoundary(HTryBoundary* try_boundary void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target) { + Mips64Label* true_target, + Mips64Label* false_target) { HInstruction* cond = instruction->InputAt(condition_input_index); if (true_target == nullptr && false_target == nullptr) { @@ -2352,12 +2375,12 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // Constant condition, statically compared against 1. if (cond->AsIntConstant()->IsOne()) { if (true_target != nullptr) { - __ B(true_target); + __ Bc(true_target); } } else { DCHECK(cond->AsIntConstant()->IsZero()); if (false_target != nullptr) { - __ B(false_target); + __ Bc(false_target); } } return; @@ -2397,7 +2420,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc } IfCondition if_cond; - Label* non_fallthrough_target; + Mips64Label* non_fallthrough_target; if (true_target == nullptr) { if_cond = condition->GetOppositeCondition(); non_fallthrough_target = false_target; @@ -2435,7 +2458,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc __ Bnezc(lhs, non_fallthrough_target); // > 0 if non-zero break; case kCondAE: - __ B(non_fallthrough_target); // always true + __ Bc(non_fallthrough_target); // always true break; } } else { @@ -2443,60 +2466,37 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc rhs_reg = TMP; __ LoadConst32(rhs_reg, rhs_imm); } - // It looks like we can get here with lhs == rhs. Should that be possible at all? - // Mips R6 requires lhs != rhs for compact branches. - if (lhs == rhs_reg) { - DCHECK(!use_imm); - switch (if_cond) { - case kCondEQ: - case kCondGE: - case kCondLE: - case kCondBE: - case kCondAE: - // if lhs == rhs for a positive condition, then it is a branch - __ B(non_fallthrough_target); - break; - case kCondNE: - case kCondLT: - case kCondGT: - case kCondB: - case kCondA: - // if lhs == rhs for a negative condition, then it is a NOP - break; - } - } else { - switch (if_cond) { - case kCondEQ: - __ Beqc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondNE: - __ Bnec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLT: - __ Bltc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondGE: - __ Bgec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLE: - __ Bgec(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondGT: - __ Bltc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondB: - __ Bltuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondAE: - __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondBE: - __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondA: - __ Bltuc(rhs_reg, lhs, non_fallthrough_target); - break; - } + switch (if_cond) { + case kCondEQ: + __ Beqc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondNE: + __ Bnec(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondLT: + __ Bltc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondGE: + __ Bgec(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondLE: + __ Bgec(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondGT: + __ Bltc(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondB: + __ Bltuc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondAE: + __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondBE: + __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondA: + __ Bltuc(rhs_reg, lhs, non_fallthrough_target); + break; } } } @@ -2504,7 +2504,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // If neither branch falls through (case 3), the conditional branch to `true_target` // was already emitted (case 2) and we need to emit a jump to `false_target`. if (true_target != nullptr && false_target != nullptr) { - __ B(false_target); + __ Bc(false_target); } } @@ -2518,9 +2518,9 @@ void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) { void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); - Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + Mips64Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? nullptr : codegen_->GetLabelOf(true_successor); - Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } @@ -2695,7 +2695,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - Label done; + Mips64Label done; // Return 0 if `obj` is null. // TODO: Avoid this check if we know `obj` is not null. @@ -2790,6 +2790,7 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); + __ Nop(); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -2924,13 +2925,14 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Jalr(&frame_entry_label_, T9); + __ Jialc(&frame_entry_label_, T9); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: // LR = invoke->GetDirectCodePtr(); __ LoadConst64(T9, invoke->GetDirectCodePtr()); // LR() __ Jalr(T9); + __ Nop(); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: @@ -2947,6 +2949,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo kMips64WordSize).Int32Value()); // T9() __ Jalr(T9); + __ Nop(); break; } DCHECK(!IsLeafMethod()); @@ -2988,6 +2991,7 @@ void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); + __ Nop(); } void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -3929,7 +3933,7 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); for (int32_t i = 0; i < num_entries; i++) { int32_t case_value = lower_bound + i; - Label* succ = codegen_->GetLabelOf(successors[i]); + Mips64Label* succ = codegen_->GetLabelOf(successors[i]); if (case_value == 0) { __ Beqzc(value_reg, succ); } else { @@ -3940,7 +3944,7 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins // And the default for any other value. if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ B(codegen_->GetLabelOf(default_block)); + __ Bc(codegen_->GetLabelOf(default_block)); } } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index a078dd1819..85e3a4a3ce 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -158,12 +158,12 @@ class SlowPathCodeMIPS64 : public SlowPathCode { public: SlowPathCodeMIPS64() : entry_label_(), exit_label_() {} - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } + Mips64Label* GetEntryLabel() { return &entry_label_; } + Mips64Label* GetExitLabel() { return &exit_label_; } private: - Label entry_label_; - Label exit_label_; + Mips64Label entry_label_; + Mips64Label exit_label_; DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS64); }; @@ -231,8 +231,8 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target); + Mips64Label* true_target, + Mips64Label* false_target); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -265,7 +265,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; } uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE { - return GetLabelOf(block)->Position(); + return assembler_.GetLabelLocation(GetLabelOf(block)); } HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } @@ -298,12 +298,12 @@ class CodeGeneratorMIPS64 : public CodeGenerator { return isa_features_; } - Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_, block); + Mips64Label* GetLabelOf(HBasicBlock* block) const { + return CommonGetLabelOf<Mips64Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_ = CommonInitializeLabels<Label>(); + block_labels_ = CommonInitializeLabels<Mips64Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -349,8 +349,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator { private: // Labels for each block that will be compiled. - Label* block_labels_; // Indexed by block id. - Label frame_entry_label_; + Mips64Label* block_labels_; // Indexed by block id. + Mips64Label frame_entry_label_; LocationsBuilderMIPS64 location_builder_; InstructionCodeGeneratorMIPS64 instruction_visitor_; ParallelMoveResolverMIPS64 move_resolver_; diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 3654159f83..ecee11dea6 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -115,7 +115,7 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, invoke_->GetLocations()); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; } @@ -806,7 +806,7 @@ static void GenRoundingMode(LocationSummary* locations, DCHECK_NE(in, out); - Label done; + Mips64Label done; // double floor/ceil(double in) { // if in.isNaN || in.isInfinite || in.isZero { @@ -1256,7 +1256,7 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); // result = tmp_value != 0; - Label loop_head, exit_loop; + Mips64Label loop_head, exit_loop; __ Daddu(TMP, base, offset); __ Sync(0); __ Bind(&loop_head); @@ -1418,10 +1418,10 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>(); GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>(); - Label loop; - Label end; - Label return_true; - Label return_false; + Mips64Label loop; + Mips64Label end; + Mips64Label return_true; + Mips64Label return_false; // Get offsets of count, value, and class fields within a string object. const int32_t count_offset = mirror::String::CountOffset().Int32Value(); @@ -1485,7 +1485,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { // If loop does not result in returning false, we return true. __ Bind(&return_true); __ LoadConst64(out, 1); - __ B(&end); + __ Bc(&end); // Return false and exit the function. __ Bind(&return_false); @@ -1514,7 +1514,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // full slow-path down and branch unconditionally. slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke); codegen->AddSlowPath(slow_path); - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); return; } diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index 34f1fe5949..2b0d522b31 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -25,6 +25,7 @@ #include "utils/assembler.h" #include "utils/arm/assembler_thumb2.h" #include "utils/mips/assembler_mips.h" +#include "utils/mips64/assembler_mips64.h" #include "optimizing/optimizing_cfi_test_expected.inc" @@ -212,6 +213,34 @@ TEST_F(OptimizingCFITest, kMipsAdjust) { Check(kMips, "kMips_adjust", expected_asm, expected_cfi); } +TEST_F(OptimizingCFITest, kMips64Adjust) { + // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. + static constexpr size_t kNumNops = 1u + (1u << 15); + std::vector<uint8_t> expected_asm( + expected_asm_kMips64_adjust_head, + expected_asm_kMips64_adjust_head + arraysize(expected_asm_kMips64_adjust_head)); + expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u); + expected_asm.insert( + expected_asm.end(), + expected_asm_kMips64_adjust_tail, + expected_asm_kMips64_adjust_tail + arraysize(expected_asm_kMips64_adjust_tail)); + std::vector<uint8_t> expected_cfi( + expected_cfi_kMips64_adjust, + expected_cfi_kMips64_adjust + arraysize(expected_cfi_kMips64_adjust)); + SetUpFrame(kMips64); +#define __ down_cast<mips64::Mips64Assembler*>(GetCodeGenerator()->GetAssembler())-> + mips64::Mips64Label target; + __ Beqc(mips64::A1, mips64::A2, &target); + // Push the target out of range of BEQC. + for (size_t i = 0; i != kNumNops; ++i) { + __ Nop(); + } + __ Bind(&target); +#undef __ + Finish(); + Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi); +} + #endif // __ANDROID__ } // namespace art diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 4571ebf2d4..de857295c7 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -413,3 +413,57 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = { // 0x0002007c: nop // 0x00020080: .cfi_restore_state // 0x00020080: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips64_adjust_head[] = { + 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, + 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7, + 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x02, 0x00, 0xA6, 0x60, + 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8, +}; +static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = { + 0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, + 0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, + 0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips64_adjust[] = { + 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, + 0x4C, 0x0E, 0x40, 0x04, 0x14, 0x00, 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, + 0x4C, 0xD0, 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: daddiu r29, r29, -40 +// 0x00000004: .cfi_def_cfa_offset: 40 +// 0x00000004: sd r31, +32(r29) +// 0x00000008: .cfi_offset: r31 at cfa-8 +// 0x00000008: sd r17, +24(r29) +// 0x0000000c: .cfi_offset: r17 at cfa-16 +// 0x0000000c: sd r16, +16(r29) +// 0x00000010: .cfi_offset: r16 at cfa-24 +// 0x00000010: sdc1 f25, +8(r29) +// 0x00000014: sdc1 f24, +0(r29) +// 0x00000018: daddiu r29, r29, -24 +// 0x0000001c: .cfi_def_cfa_offset: 64 +// 0x0000001c: sd r4, +0(r29) +// 0x00000020: bnec r5, r6, 0x0000002c ; +12 +// 0x00000024: auipc r1, 2 +// 0x00000028: jic r1, 12 ; b 0x00020030 ; +131080 +// 0x0000002c: nop +// ... +// 0x0002002c: nop +// 0x00020030: .cfi_remember_state +// 0x00020030: daddiu r29, r29, 24 +// 0x00020034: .cfi_def_cfa_offset: 40 +// 0x00020034: ldc1 f24, +0(r29) +// 0x00020038: ldc1 f25, +8(r29) +// 0x0002003c: ld r16, +16(r29) +// 0x00020040: .cfi_restore: r16 +// 0x00020040: ld r17, +24(r29) +// 0x00020044: .cfi_restore: r17 +// 0x00020044: ld r31, +32(r29) +// 0x00020048: .cfi_restore: r31 +// 0x00020048: daddiu r29, r29, 40 +// 0x0002004c: .cfi_def_cfa_offset: 0 +// 0x0002004c: jr r31 +// 0x00020050: nop +// 0x00020054: .cfi_restore_state +// 0x00020054: .cfi_def_cfa_offset: 64 diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index f1233ca457..9457da1c36 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -840,12 +840,17 @@ class AssemblerTest : public testing::Test { return str; } + // Override this to pad the code with NOPs to a certain size if needed. + virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) { + } + void DriverWrapper(std::string assembly_text, std::string test_name) { assembler_->FinalizeCode(); size_t cs = assembler_->CodeSize(); std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*data)[0], data->size()); assembler_->FinalizeInstructions(code); + Pad(*data); test_helper_->Driver(*data, assembly_text, test_name); } diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index ba2525e555..107d5bb572 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -19,15 +19,73 @@ #include "base/bit_utils.h" #include "base/casts.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" #include "memory_region.h" #include "thread.h" namespace art { namespace mips64 { +void Mips64Assembler::FinalizeCode() { + for (auto& exception_block : exception_blocks_) { + EmitExceptionPoll(&exception_block); + } + PromoteBranches(); +} + +void Mips64Assembler::FinalizeInstructions(const MemoryRegion& region) { + EmitBranches(); + Assembler::FinalizeInstructions(region); + PatchCFI(); +} + +void Mips64Assembler::PatchCFI() { + if (cfi().NumberOfDelayedAdvancePCs() == 0u) { + return; + } + + typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; + const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); + const std::vector<uint8_t>& old_stream = data.first; + const std::vector<DelayedAdvancePC>& advances = data.second; + + // Refill our data buffer with patched opcodes. + cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16); + size_t stream_pos = 0; + for (const DelayedAdvancePC& advance : advances) { + DCHECK_GE(advance.stream_pos, stream_pos); + // Copy old data up to the point where advance was issued. + cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos); + stream_pos = advance.stream_pos; + // Insert the advance command with its final offset. + size_t final_pc = GetAdjustedPosition(advance.pc); + cfi().AdvancePC(final_pc); + } + // Copy the final segment if any. + cfi().AppendRawData(old_stream, stream_pos, old_stream.size()); +} + +void Mips64Assembler::EmitBranches() { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (branch placeholders) in the buffer. + overwriting_ = true; + for (auto& branch : branches_) { + EmitBranch(&branch); + } + overwriting_ = false; +} + void Mips64Assembler::Emit(uint32_t value) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - buffer_.Emit<uint32_t>(value); + if (overwriting_) { + // Branches to labels are emitted into their placeholders here. + buffer_.Store<uint32_t>(overwrite_location_, value); + overwrite_location_ += sizeof(uint32_t); + } else { + // Other instructions are simply appended at the end here. + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + buffer_.Emit<uint32_t>(value); + } } void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, @@ -82,15 +140,16 @@ void Mips64Assembler::EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t void Mips64Assembler::EmitI21(int opcode, GpuRegister rs, uint32_t imm21) { CHECK_NE(rs, kNoGpuRegister); + CHECK(IsUint<21>(imm21)) << imm21; uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | static_cast<uint32_t>(rs) << kRsShift | - (imm21 & 0x1FFFFF); + imm21; Emit(encoding); } -void Mips64Assembler::EmitJ(int opcode, uint32_t addr26) { - uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | - (addr26 & 0x3FFFFFF); +void Mips64Assembler::EmitI26(int opcode, uint32_t imm26) { + CHECK(IsUint<26>(imm26)) << imm26; + uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26; Emit(encoding); } @@ -428,26 +487,6 @@ void Mips64Assembler::Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0xb, rs, rt, imm16); } -void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) { - EmitI(0x4, rs, rt, imm16); - Nop(); -} - -void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) { - EmitI(0x5, rs, rt, imm16); - Nop(); -} - -void Mips64Assembler::J(uint32_t addr26) { - EmitJ(0x2, addr26); - Nop(); -} - -void Mips64Assembler::Jal(uint32_t addr26) { - EmitJ(0x3, addr26); - Nop(); -} - void Mips64Assembler::Seleqz(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 0, 0x35); } @@ -474,7 +513,6 @@ void Mips64Assembler::Dclo(GpuRegister rd, GpuRegister rs) { void Mips64Assembler::Jalr(GpuRegister rd, GpuRegister rs) { EmitR(0, rs, static_cast<GpuRegister>(0), rd, 0, 0x09); - Nop(); } void Mips64Assembler::Jalr(GpuRegister rs) { @@ -489,6 +527,15 @@ void Mips64Assembler::Auipc(GpuRegister rs, uint16_t imm16) { EmitI(0x3B, rs, static_cast<GpuRegister>(0x1E), imm16); } +void Mips64Assembler::Addiupc(GpuRegister rs, uint32_t imm19) { + CHECK(IsUint<19>(imm19)) << imm19; + EmitI21(0x3B, rs, imm19); +} + +void Mips64Assembler::Bc(uint32_t imm26) { + EmitI26(0x32, imm26); +} + void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) { EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16); } @@ -549,14 +596,14 @@ void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x8, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16); + EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16); } void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x18, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16); + EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16); } void Mips64Assembler::Beqzc(GpuRegister rs, uint32_t imm21) { @@ -569,6 +616,65 @@ void Mips64Assembler::Bnezc(GpuRegister rs, uint32_t imm21) { EmitI21(0x3E, rs, imm21); } +void Mips64Assembler::EmitBcondc(BranchCondition cond, + GpuRegister rs, + GpuRegister rt, + uint32_t imm16_21) { + switch (cond) { + case kCondLT: + Bltc(rs, rt, imm16_21); + break; + case kCondGE: + Bgec(rs, rt, imm16_21); + break; + case kCondLE: + Bgec(rt, rs, imm16_21); + break; + case kCondGT: + Bltc(rt, rs, imm16_21); + break; + case kCondLTZ: + CHECK_EQ(rt, ZERO); + Bltzc(rs, imm16_21); + break; + case kCondGEZ: + CHECK_EQ(rt, ZERO); + Bgezc(rs, imm16_21); + break; + case kCondLEZ: + CHECK_EQ(rt, ZERO); + Blezc(rs, imm16_21); + break; + case kCondGTZ: + CHECK_EQ(rt, ZERO); + Bgtzc(rs, imm16_21); + break; + case kCondEQ: + Beqc(rs, rt, imm16_21); + break; + case kCondNE: + Bnec(rs, rt, imm16_21); + break; + case kCondEQZ: + CHECK_EQ(rt, ZERO); + Beqzc(rs, imm16_21); + break; + case kCondNEZ: + CHECK_EQ(rt, ZERO); + Bnezc(rs, imm16_21); + break; + case kCondLTU: + Bltuc(rs, rt, imm16_21); + break; + case kCondGEU: + Bgeuc(rs, rt, imm16_21); + break; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << cond; + UNREACHABLE(); + } +} + void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { EmitFR(0x11, 0x10, ft, fs, fd, 0x0); } @@ -925,15 +1031,6 @@ void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) { } } -void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp) { - if (IsInt<16>(value)) { - Addiu(rt, rs, value); - } else { - LoadConst32(rtmp, value); - Addu(rt, rs, rtmp); - } -} - void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) { if (IsInt<16>(value)) { Daddiu(rt, rs, value); @@ -943,177 +1040,621 @@ void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, Gp } } -// -// MIPS64R6 branches -// -// -// Unconditional (pc + 32-bit signed offset): -// -// auipc at, ofs_high -// jic at, ofs_low -// // no delay/forbidden slot -// -// -// Conditional (pc + 32-bit signed offset): -// -// b<cond>c reg, +2 // skip next 2 instructions -// auipc at, ofs_high -// jic at, ofs_low -// // no delay/forbidden slot -// -// -// Unconditional (pc + 32-bit signed offset) and link: -// -// auipc reg, ofs_high -// daddiu reg, ofs_low -// jialc reg, 0 -// // no delay/forbidden slot -// -// -// TODO: use shorter instruction sequences whenever possible. -// - -void Mips64Assembler::Bind(Label* label) { - CHECK(!label->IsBound()); - int32_t bound_pc = buffer_.Size(); +void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBits offset_size, + Mips64Assembler::Branch::Type short_type, + Mips64Assembler::Branch::Type long_type) { + type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; +} - // Walk the list of the branches (auipc + jic pairs) referring to and preceding this label. - // Embed the previously unknown pc-relative addresses in them. - while (label->IsLinked()) { - int32_t position = label->Position(); - // Extract the branch (instruction pair) - uint32_t auipc = buffer_.Load<uint32_t>(position); - uint32_t jic = buffer_.Load<uint32_t>(position + 4); // actually, jic or daddiu +void Mips64Assembler::Branch::InitializeType(bool is_call) { + OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); + if (is_call) { + InitShortOrLong(offset_size, kCall, kLongCall); + } else if (condition_ == kUncond) { + InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + } else { + if (condition_ == kCondEQZ || condition_ == kCondNEZ) { + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch; + } else { + InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + } + } + old_type_ = type_; +} + +bool Mips64Assembler::Branch::IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs) { + switch (condition) { + case kCondLT: + case kCondGT: + case kCondNE: + case kCondLTU: + return lhs == rhs; + default: + return false; + } +} + +bool Mips64Assembler::Branch::IsUncond(BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs) { + switch (condition) { + case kUncond: + return true; + case kCondGE: + case kCondLE: + case kCondEQ: + case kCondGEU: + return lhs == rhs; + default: + return false; + } +} + +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(ZERO), + rhs_reg_(ZERO), + condition_(kUncond) { + InitializeType(false); +} + +Mips64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + Mips64Assembler::BranchCondition condition, + GpuRegister lhs_reg, + GpuRegister rhs_reg) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(lhs_reg), + rhs_reg_(rhs_reg), + condition_(condition) { + CHECK_NE(condition, kUncond); + switch (condition) { + case kCondEQ: + case kCondNE: + case kCondLT: + case kCondGE: + case kCondLE: + case kCondGT: + case kCondLTU: + case kCondGEU: + CHECK_NE(lhs_reg, ZERO); + CHECK_NE(rhs_reg, ZERO); + break; + case kCondLTZ: + case kCondGEZ: + case kCondLEZ: + case kCondGTZ: + case kCondEQZ: + case kCondNEZ: + CHECK_NE(lhs_reg, ZERO); + CHECK_EQ(rhs_reg, ZERO); + break; + case kUncond: + UNREACHABLE(); + } + CHECK(!IsNop(condition, lhs_reg, rhs_reg)); + if (IsUncond(condition, lhs_reg, rhs_reg)) { + // Branch condition is always true, make the branch unconditional. + condition_ = kUncond; + } + InitializeType(false); +} + +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(indirect_reg), + rhs_reg_(ZERO), + condition_(kUncond) { + CHECK_NE(indirect_reg, ZERO); + CHECK_NE(indirect_reg, AT); + InitializeType(true); +} + +Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition( + Mips64Assembler::BranchCondition cond) { + switch (cond) { + case kCondLT: + return kCondGE; + case kCondGE: + return kCondLT; + case kCondLE: + return kCondGT; + case kCondGT: + return kCondLE; + case kCondLTZ: + return kCondGEZ; + case kCondGEZ: + return kCondLTZ; + case kCondLEZ: + return kCondGTZ; + case kCondGTZ: + return kCondLEZ; + case kCondEQ: + return kCondNE; + case kCondNE: + return kCondEQ; + case kCondEQZ: + return kCondNEZ; + case kCondNEZ: + return kCondEQZ; + case kCondLTU: + return kCondGEU; + case kCondGEU: + return kCondLTU; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << cond; + } + UNREACHABLE(); +} + +Mips64Assembler::Branch::Type Mips64Assembler::Branch::GetType() const { + return type_; +} + +Mips64Assembler::BranchCondition Mips64Assembler::Branch::GetCondition() const { + return condition_; +} + +GpuRegister Mips64Assembler::Branch::GetLeftRegister() const { + return lhs_reg_; +} + +GpuRegister Mips64Assembler::Branch::GetRightRegister() const { + return rhs_reg_; +} + +uint32_t Mips64Assembler::Branch::GetTarget() const { + return target_; +} - // Extract the location of the previous pair in the list (walking the list backwards; - // the previous pair location was stored in the immediate operands of the instructions) - int32_t prev = (auipc << 16) | (jic & 0xFFFF); +uint32_t Mips64Assembler::Branch::GetLocation() const { + return location_; +} + +uint32_t Mips64Assembler::Branch::GetOldLocation() const { + return old_location_; +} + +uint32_t Mips64Assembler::Branch::GetLength() const { + return branch_info_[type_].length; +} + +uint32_t Mips64Assembler::Branch::GetOldLength() const { + return branch_info_[old_type_].length; +} + +uint32_t Mips64Assembler::Branch::GetSize() const { + return GetLength() * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetOldSize() const { + return GetOldLength() * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetEndLocation() const { + return GetLocation() + GetSize(); +} + +uint32_t Mips64Assembler::Branch::GetOldEndLocation() const { + return GetOldLocation() + GetOldSize(); +} + +bool Mips64Assembler::Branch::IsLong() const { + switch (type_) { + // Short branches. + case kUncondBranch: + case kCondBranch: + case kCall: + return false; + // Long branches. + case kLongUncondBranch: + case kLongCondBranch: + case kLongCall: + return true; + } + UNREACHABLE(); +} + +bool Mips64Assembler::Branch::IsResolved() const { + return target_ != kUnresolved; +} + +Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSize() const { + OffsetBits offset_size = + (type_ == kCondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ)) + ? kOffset23 + : branch_info_[type_].offset_size; + return offset_size; +} + +Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSizeNeeded(uint32_t location, + uint32_t target) { + // For unresolved targets assume the shortest encoding + // (later it will be made longer if needed). + if (target == kUnresolved) + return kOffset16; + int64_t distance = static_cast<int64_t>(target) - location; + // To simplify calculations in composite branches consisting of multiple instructions + // bump up the distance by a value larger than the max byte size of a composite branch. + distance += (distance >= 0) ? kMaxBranchSize : -kMaxBranchSize; + if (IsInt<kOffset16>(distance)) + return kOffset16; + else if (IsInt<kOffset18>(distance)) + return kOffset18; + else if (IsInt<kOffset21>(distance)) + return kOffset21; + else if (IsInt<kOffset23>(distance)) + return kOffset23; + else if (IsInt<kOffset28>(distance)) + return kOffset28; + return kOffset32; +} + +void Mips64Assembler::Branch::Resolve(uint32_t target) { + target_ = target; +} + +void Mips64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) { + if (location_ > expand_location) { + location_ += delta; + } + if (!IsResolved()) { + return; // Don't know the target yet. + } + if (target_ > expand_location) { + target_ += delta; + } +} + +void Mips64Assembler::Branch::PromoteToLong() { + switch (type_) { + // Short branches. + case kUncondBranch: + type_ = kLongUncondBranch; + break; + case kCondBranch: + type_ = kLongCondBranch; + break; + case kCall: + type_ = kLongCall; + break; + default: + // Note: 'type_' is already long. + break; + } + CHECK(IsLong()); +} + +uint32_t Mips64Assembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { + // If the branch is still unresolved or already long, nothing to do. + if (IsLong() || !IsResolved()) { + return 0; + } + // Promote the short branch to long if the offset size is too small + // to hold the distance between location_ and target_. + if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) { + PromoteToLong(); + uint32_t old_size = GetOldSize(); + uint32_t new_size = GetSize(); + CHECK_GT(new_size, old_size); + return new_size - old_size; + } + // The following logic is for debugging/testing purposes. + // Promote some short branches to long when it's not really required. + if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) { + int64_t distance = static_cast<int64_t>(target_) - location_; + distance = (distance >= 0) ? distance : -distance; + if (distance >= max_short_distance) { + PromoteToLong(); + uint32_t old_size = GetOldSize(); + uint32_t new_size = GetSize(); + CHECK_GT(new_size, old_size); + return new_size - old_size; + } + } + return 0; +} + +uint32_t Mips64Assembler::Branch::GetOffsetLocation() const { + return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetOffset() const { + CHECK(IsResolved()); + uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize()); + // Calculate the byte distance between instructions and also account for + // different PC-relative origins. + uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t); + // Prepare the offset for encoding into the instruction(s). + offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift; + return offset; +} - // Get the pc-relative address - uint32_t offset = bound_pc - position; - offset += (offset & 0x8000) << 1; // account for sign extension in jic/daddiu +Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +const Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) const { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +void Mips64Assembler::Bind(Mips64Label* label) { + CHECK(!label->IsBound()); + uint32_t bound_pc = buffer_.Size(); - // Embed it in the two instructions - auipc = (auipc & 0xFFFF0000) | (offset >> 16); - jic = (jic & 0xFFFF0000) | (offset & 0xFFFF); + // Walk the list of branches referring to and preceding this label. + // Store the previously unknown target addresses in them. + while (label->IsLinked()) { + uint32_t branch_id = label->Position(); + Branch* branch = GetBranch(branch_id); + branch->Resolve(bound_pc); - // Save the adjusted instructions - buffer_.Store<uint32_t>(position, auipc); - buffer_.Store<uint32_t>(position + 4, jic); + uint32_t branch_location = branch->GetLocation(); + // Extract the location of the previous branch in the list (walking the list backwards; + // the previous branch ID was stored in the space reserved for this branch). + uint32_t prev = buffer_.Load<uint32_t>(branch_location); // On to the previous branch in the list... label->position_ = prev; } - // Now make the label object contain its own location - // (it will be used by the branches referring to and following this label) + // Now make the label object contain its own location (relative to the end of the preceding + // branch, if any; it will be used by the branches referring to and following this label). + label->prev_branch_id_plus_one_ = branches_.size(); + if (label->prev_branch_id_plus_one_) { + uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; + const Branch* branch = GetBranch(branch_id); + bound_pc -= branch->GetEndLocation(); + } label->BindTo(bound_pc); } -void Mips64Assembler::B(Label* label) { - if (label->IsBound()) { - // Branch backwards (to a preceding label), distance is known - uint32_t offset = label->Position() - buffer_.Size(); - CHECK_LE(static_cast<int32_t>(offset), 0); - offset += (offset & 0x8000) << 1; // account for sign extension in jic - Auipc(AT, offset >> 16); - Jic(AT, offset); - } else { - // Branch forward (to a following label), distance is unknown - int32_t position = buffer_.Size(); - // The first branch forward will have 0 in its pc-relative address (copied from label's - // position). It will be the terminator of the list of forward-reaching branches. - uint32_t prev = label->position_; - Auipc(AT, prev >> 16); - Jic(AT, prev); - // Now make the link object point to the location of this branch - // (this forms a linked list of branches preceding this label) - label->LinkTo(position); +uint32_t Mips64Assembler::GetLabelLocation(Mips64Label* label) const { + CHECK(label->IsBound()); + uint32_t target = label->Position(); + if (label->prev_branch_id_plus_one_) { + // Get label location based on the branch preceding it. + uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; + const Branch* branch = GetBranch(branch_id); + target += branch->GetEndLocation(); + } + return target; +} + +uint32_t Mips64Assembler::GetAdjustedPosition(uint32_t old_position) { + // We can reconstruct the adjustment by going through all the branches from the beginning + // up to the old_position. Since we expect AdjustedPosition() to be called in a loop + // with increasing old_position, we can use the data from last AdjustedPosition() to + // continue where we left off and the whole loop should be O(m+n) where m is the number + // of positions to adjust and n is the number of branches. + if (old_position < last_old_position_) { + last_position_adjustment_ = 0; + last_old_position_ = 0; + last_branch_id_ = 0; + } + while (last_branch_id_ != branches_.size()) { + const Branch* branch = GetBranch(last_branch_id_); + if (branch->GetLocation() >= old_position + last_position_adjustment_) { + break; + } + last_position_adjustment_ += branch->GetSize() - branch->GetOldSize(); + ++last_branch_id_; + } + last_old_position_ = old_position; + return old_position + last_position_adjustment_; +} + +void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) { + uint32_t length = branches_.back().GetLength(); + if (!label->IsBound()) { + // Branch forward (to a following label), distance is unknown. + // The first branch forward will contain 0, serving as the terminator of + // the list of forward-reaching branches. + Emit(label->position_); + length--; + // Now make the label object point to this branch + // (this forms a linked list of branches preceding this label). + uint32_t branch_id = branches_.size() - 1; + label->LinkTo(branch_id); + } + // Reserve space for the branch. + while (length--) { + Nop(); } } -void Mips64Assembler::Jalr(Label* label, GpuRegister indirect_reg) { - if (label->IsBound()) { - // Branch backwards (to a preceding label), distance is known - uint32_t offset = label->Position() - buffer_.Size(); - CHECK_LE(static_cast<int32_t>(offset), 0); - offset += (offset & 0x8000) << 1; // account for sign extension in daddiu - Auipc(indirect_reg, offset >> 16); - Daddiu(indirect_reg, indirect_reg, offset); - Jialc(indirect_reg, 0); - } else { - // Branch forward (to a following label), distance is unknown - int32_t position = buffer_.Size(); - // The first branch forward will have 0 in its pc-relative address (copied from label's - // position). It will be the terminator of the list of forward-reaching branches. - uint32_t prev = label->position_; - Auipc(indirect_reg, prev >> 16); - Daddiu(indirect_reg, indirect_reg, prev); - Jialc(indirect_reg, 0); - // Now make the link object point to the location of this branch - // (this forms a linked list of branches preceding this label) - label->LinkTo(position); +void Mips64Assembler::Buncond(Mips64Label* label) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::Bcond(Mips64Label* label, + BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs) { + // If lhs = rhs, this can be a NOP. + if (Branch::IsNop(condition, lhs, rhs)) { + return; + } + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::Call(Mips64Label* label, GpuRegister indirect_reg) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, indirect_reg); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::PromoteBranches() { + // Promote short branches to long as necessary. + bool changed; + do { + changed = false; + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + uint32_t delta = branch.PromoteIfNeeded(); + // If this branch has been promoted and needs to expand in size, + // relocate all branches by the expansion size. + if (delta) { + changed = true; + uint32_t expand_location = branch.GetLocation(); + for (auto& branch2 : branches_) { + branch2.Relocate(expand_location, delta); + } + } + } + } while (changed); + + // Account for branch expansion by resizing the code buffer + // and moving the code in it to its final location. + size_t branch_count = branches_.size(); + if (branch_count > 0) { + // Resize. + Branch& last_branch = branches_[branch_count - 1]; + uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation(); + uint32_t old_size = buffer_.Size(); + buffer_.Resize(old_size + size_delta); + // Move the code residing between branch placeholders. + uint32_t end = old_size; + for (size_t i = branch_count; i > 0; ) { + Branch& branch = branches_[--i]; + uint32_t size = end - branch.GetOldEndLocation(); + buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size); + end = branch.GetOldLocation(); + } + } +} + +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. +const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[] = { + // Short branches. + { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kUncondBranch + { 2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kCondBranch + // Exception: kOffset23 for beqzc/bnezc + { 2, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kCall + // Long branches. + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongUncondBranch + { 3, 1, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCondBranch + { 3, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCall +}; + +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. +void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { + CHECK(overwriting_); + overwrite_location_ = branch->GetLocation(); + uint32_t offset = branch->GetOffset(); + BranchCondition condition = branch->GetCondition(); + GpuRegister lhs = branch->GetLeftRegister(); + GpuRegister rhs = branch->GetRightRegister(); + switch (branch->GetType()) { + // Short branches. + case Branch::kUncondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Bc(offset); + break; + case Branch::kCondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + EmitBcondc(condition, lhs, rhs, offset); + Nop(); // TODO: improve by filling the forbidden slot. + break; + case Branch::kCall: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Addiupc(lhs, offset); + Jialc(lhs, 0); + break; + + // Long branches. + case Branch::kLongUncondBranch: + offset += (offset & 0x8000) << 1; // Account for sign extension in jic. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jic(AT, Low16Bits(offset)); + break; + case Branch::kLongCondBranch: + EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2); + offset += (offset & 0x8000) << 1; // Account for sign extension in jic. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jic(AT, Low16Bits(offset)); + break; + case Branch::kLongCall: + offset += (offset & 0x8000) << 1; // Account for sign extension in daddiu. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(lhs, High16Bits(offset)); + Daddiu(lhs, lhs, Low16Bits(offset)); + Jialc(lhs, 0); + break; } + CHECK_EQ(overwrite_location_, branch->GetEndLocation()); + CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize)); } -void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Label* label) { - Bgec(rs, rt, 2); - B(label); +void Mips64Assembler::Bc(Mips64Label* label) { + Buncond(label); } -void Mips64Assembler::Bltzc(GpuRegister rt, Label* label) { - Bgezc(rt, 2); - B(label); +void Mips64Assembler::Jialc(Mips64Label* label, GpuRegister indirect_reg) { + Call(label, indirect_reg); } -void Mips64Assembler::Bgtzc(GpuRegister rt, Label* label) { - Blezc(rt, 2); - B(label); +void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLT, rs, rt); } -void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Label* label) { - Bltc(rs, rt, 2); - B(label); +void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLTZ, rt); } -void Mips64Assembler::Bgezc(GpuRegister rt, Label* label) { - Bltzc(rt, 2); - B(label); +void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGTZ, rt); } -void Mips64Assembler::Blezc(GpuRegister rt, Label* label) { - Bgtzc(rt, 2); - B(label); +void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGE, rs, rt); } -void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Label* label) { - Bgeuc(rs, rt, 2); - B(label); +void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGEZ, rt); } -void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Label* label) { - Bltuc(rs, rt, 2); - B(label); +void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLEZ, rt); } -void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Label* label) { - Bnec(rs, rt, 2); - B(label); +void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLTU, rs, rt); } -void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Label* label) { - Beqc(rs, rt, 2); - B(label); +void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGEU, rs, rt); } -void Mips64Assembler::Beqzc(GpuRegister rs, Label* label) { - Bnezc(rs, 2); - B(label); +void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondEQ, rs, rt); } -void Mips64Assembler::Bnezc(GpuRegister rs, Label* label) { - Beqzc(rs, 2); - B(label); +void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondNE, rs, rt); +} + +void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label) { + Bcond(label, kCondEQZ, rs); +} + +void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) { + Bcond(label, kCondNEZ, rs); } void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, @@ -1256,6 +1797,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); // Increase frame to required size. IncreaseFrameSize(frame_size); @@ -1298,6 +1840,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void Mips64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); cfi_.RememberState(); // Pop callee saves and return address @@ -1316,6 +1859,7 @@ void Mips64Assembler::RemoveFrame(size_t frame_size, // Then jump to the return address. Jr(RA); + Nop(); // The CFI should be restored for any code that follows the exit block. cfi_.RestoreState(); @@ -1324,12 +1868,14 @@ void Mips64Assembler::RemoveFrame(size_t frame_size, void Mips64Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); + DCHECK(!overwriting_); Daddiu64(SP, SP, static_cast<int32_t>(-adjust)); cfi_.AdjustCFAOffset(adjust); } void Mips64Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); + DCHECK(!overwriting_); Daddiu64(SP, SP, static_cast<int32_t>(adjust)); cfi_.AdjustCFAOffset(-adjust); } @@ -1379,17 +1925,7 @@ void Mips64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value()); } -void Mips64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister mscratch) { - Mips64ManagedRegister scratch = mscratch.AsMips64(); - CHECK(scratch.IsGpuRegister()) << scratch; - // TODO: it's unclear wether 32 or 64 bits need to be stored (Arm64 and x86/x64 disagree?). - // Is this function even referenced anywhere else in the code? - LoadConst32(scratch.AsGpuRegister(), imm); - StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value()); -} - -void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, +void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); @@ -1398,7 +1934,7 @@ void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value()); } -void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) { +void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) { StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value()); } @@ -1415,7 +1951,9 @@ void Mips64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) return EmitLoad(mdest, SP, src.Int32Value(), size); } -void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) { +void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> src, + size_t size) { return EmitLoad(mdest, S1, src.Int32Value(), size); } @@ -1449,18 +1987,20 @@ void Mips64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, } void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, - ThreadOffset<8> offs) { + ThreadOffset<kMipsDoublewordSize> offs) { Mips64ManagedRegister dest = mdest.AsMips64(); CHECK(dest.IsGpuRegister()); LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value()); } -void Mips64Assembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no sign extension necessary for mips"; +void Mips64Assembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No sign extension necessary for MIPS64"; } -void Mips64Assembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips"; +void Mips64Assembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No zero extension necessary for MIPS64"; } void Mips64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) { @@ -1492,7 +2032,7 @@ void Mips64Assembler::CopyRef(FrameOffset dest, FrameOffset src, } void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, - ThreadOffset<8> thr_offs, + ThreadOffset<kMipsDoublewordSize> thr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; @@ -1500,7 +2040,7 @@ void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value()); } -void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs, +void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); @@ -1561,9 +2101,12 @@ void Mips64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameO } } -void Mips64Assembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, + FrameOffset src_base ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset, @@ -1584,15 +2127,18 @@ void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset, } } -void Mips64Assembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset -/*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, + Offset dest_offset ATTRIBUTE_UNUSED, + FrameOffset src ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } -void Mips64Assembler::MemoryBarrier(ManagedRegister) { +void Mips64Assembler::MemoryBarrier(ManagedRegister mreg ATTRIBUTE_UNUSED) { // TODO: sync? - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, @@ -1604,7 +2150,7 @@ void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg; CHECK(out_reg.IsGpuRegister()) << out_reg; if (null_allowed) { - Label null_arg; + Mips64Label null_arg; // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is // the address in the handle scope holding the reference. // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) @@ -1631,7 +2177,7 @@ void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off, Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; if (null_allowed) { - Label null_arg; + Mips64Label null_arg; LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is @@ -1653,7 +2199,7 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, Mips64ManagedRegister in_reg = min_reg.AsMips64(); CHECK(out_reg.IsGpuRegister()) << out_reg; CHECK(in_reg.IsGpuRegister()) << in_reg; - Label null_arg; + Mips64Label null_arg; if (!out_reg.Equals(in_reg)) { LoadConst32(out_reg.AsGpuRegister(), 0); } @@ -1663,11 +2209,13 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, Bind(&null_arg); } -void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { +void Mips64Assembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references } -void Mips64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { +void Mips64Assembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references } @@ -1679,6 +2227,7 @@ void Mips64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), base.AsGpuRegister(), offset.Int32Value()); Jalr(scratch.AsGpuRegister()); + Nop(); // TODO: place reference map on call } @@ -1691,11 +2240,13 @@ void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscr LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), scratch.AsGpuRegister(), offset.Int32Value()); Jalr(scratch.AsGpuRegister()); + Nop(); // TODO: place reference map on call } -void Mips64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*mscratch*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::GetCurrentThread(ManagedRegister tr) { @@ -1703,37 +2254,39 @@ void Mips64Assembler::GetCurrentThread(ManagedRegister tr) { } void Mips64Assembler::GetCurrentThread(FrameOffset offset, - ManagedRegister /*mscratch*/) { + ManagedRegister mscratch ATTRIBUTE_UNUSED) { StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value()); } void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) { Mips64ManagedRegister scratch = mscratch.AsMips64(); - Mips64ExceptionSlowPath* slow = new Mips64ExceptionSlowPath(scratch, stack_adjust); - buffer_.EnqueueSlowPath(slow); - LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), - S1, Thread::ExceptionOffset<8>().Int32Value()); - Bnezc(scratch.AsGpuRegister(), slow->Entry()); -} - -void Mips64ExceptionSlowPath::Emit(Assembler* sasm) { - Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm); -#define __ sp_asm-> - __ Bind(&entry_); - if (stack_adjust_ != 0) { // Fix up the frame. - __ DecreaseFrameSize(stack_adjust_); + exception_blocks_.emplace_back(scratch, stack_adjust); + LoadFromOffset(kLoadDoubleword, + scratch.AsGpuRegister(), + S1, + Thread::ExceptionOffset<kMipsDoublewordSize>().Int32Value()); + Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry()); +} + +void Mips64Assembler::EmitExceptionPoll(Mips64ExceptionSlowPath* exception) { + Bind(exception->Entry()); + if (exception->stack_adjust_ != 0) { // Fix up the frame. + DecreaseFrameSize(exception->stack_adjust_); } - // Pass exception object as argument - // Don't care about preserving A0 as this call won't return - __ Move(A0, scratch_.AsGpuRegister()); + // Pass exception object as argument. + // Don't care about preserving A0 as this call won't return. + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); + Move(A0, exception->scratch_.AsGpuRegister()); // Set up call to Thread::Current()->pDeliverException - __ LoadFromOffset(kLoadDoubleword, T9, S1, - QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()); - // TODO: check T9 usage - __ Jr(T9); + LoadFromOffset(kLoadDoubleword, + T9, + S1, + QUICK_ENTRYPOINT_OFFSET(kMipsDoublewordSize, pDeliverException).Int32Value()); + Jr(T9); + Nop(); + // Call never returns - __ Break(); -#undef __ + Break(); } } // namespace mips64 diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 42962bca20..57fc19a6e9 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -17,18 +17,22 @@ #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ +#include <utility> #include <vector> #include "base/macros.h" #include "constants_mips64.h" #include "globals.h" #include "managed_register_mips64.h" -#include "utils/assembler.h" #include "offsets.h" +#include "utils/assembler.h" +#include "utils/label.h" namespace art { namespace mips64 { +static constexpr size_t kMipsDoublewordSize = 8; + enum LoadOperandType { kLoadSignedByte, kLoadUnsignedByte, @@ -60,10 +64,57 @@ enum FPClassMaskType { kPositiveZero = 0x200, }; +class Mips64Label : public Label { + public: + Mips64Label() : prev_branch_id_plus_one_(0) {} + + Mips64Label(Mips64Label&& src) + : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {} + + private: + uint32_t prev_branch_id_plus_one_; // To get distance from preceding branch, if any. + + friend class Mips64Assembler; + DISALLOW_COPY_AND_ASSIGN(Mips64Label); +}; + +// Slowpath entered when Thread::Current()->_exception is non-null. +class Mips64ExceptionSlowPath { + public: + explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) + : scratch_(scratch), stack_adjust_(stack_adjust) {} + + Mips64ExceptionSlowPath(Mips64ExceptionSlowPath&& src) + : scratch_(src.scratch_), + stack_adjust_(src.stack_adjust_), + exception_entry_(std::move(src.exception_entry_)) {} + + private: + Mips64Label* Entry() { return &exception_entry_; } + const Mips64ManagedRegister scratch_; + const size_t stack_adjust_; + Mips64Label exception_entry_; + + friend class Mips64Assembler; + DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath); +}; + class Mips64Assembler FINAL : public Assembler { public: - Mips64Assembler() {} - virtual ~Mips64Assembler() {} + Mips64Assembler() + : overwriting_(false), + overwrite_location_(0), + last_position_adjustment_(0), + last_old_position_(0), + last_branch_id_(0) { + cfi().DelayEmittingAdvancePCs(); + } + + virtual ~Mips64Assembler() { + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + } + } // Emit Machine Instructions. void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt); @@ -156,14 +207,12 @@ class Mips64Assembler FINAL : public Assembler { void Dclz(GpuRegister rd, GpuRegister rs); void Dclo(GpuRegister rd, GpuRegister rs); - void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16); - void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16); - void J(uint32_t addr26); - void Jal(uint32_t addr26); void Jalr(GpuRegister rd, GpuRegister rs); void Jalr(GpuRegister rs); void Jr(GpuRegister rs); void Auipc(GpuRegister rs, uint16_t imm16); + void Addiupc(GpuRegister rs, uint32_t imm19); + void Bc(uint32_t imm26); void Jic(GpuRegister rt, uint16_t imm16); void Jialc(GpuRegister rt, uint16_t imm16); void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16); @@ -240,32 +289,34 @@ class Mips64Assembler FINAL : public Assembler { void Clear(GpuRegister rd); void Not(GpuRegister rd, GpuRegister rs); - // Higher level composite instructions + // Higher level composite instructions. void LoadConst32(GpuRegister rd, int32_t value); void LoadConst64(GpuRegister rd, int64_t value); // MIPS64 - void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT); void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64 - void Bind(Label* label) OVERRIDE; - void Jump(Label* label) OVERRIDE { - B(label); + void Bind(Label* label) OVERRIDE { + Bind(down_cast<Mips64Label*>(label)); + } + void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64"; } - void B(Label* label); - void Jalr(Label* label, GpuRegister indirect_reg = RA); - // TODO: implement common for R6 and non-R6 interface for conditional branches? - void Bltc(GpuRegister rs, GpuRegister rt, Label* label); - void Bltzc(GpuRegister rt, Label* label); - void Bgtzc(GpuRegister rt, Label* label); - void Bgec(GpuRegister rs, GpuRegister rt, Label* label); - void Bgezc(GpuRegister rt, Label* label); - void Blezc(GpuRegister rt, Label* label); - void Bltuc(GpuRegister rs, GpuRegister rt, Label* label); - void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label); - void Beqc(GpuRegister rs, GpuRegister rt, Label* label); - void Bnec(GpuRegister rs, GpuRegister rt, Label* label); - void Beqzc(GpuRegister rs, Label* label); - void Bnezc(GpuRegister rs, Label* label); + + void Bind(Mips64Label* label); + void Bc(Mips64Label* label); + void Jialc(Mips64Label* label, GpuRegister indirect_reg); + void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bltzc(GpuRegister rt, Mips64Label* label); + void Bgtzc(GpuRegister rt, Mips64Label* label); + void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bgezc(GpuRegister rt, Mips64Label* label); + void Blezc(GpuRegister rt, Mips64Label* label); + void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Beqzc(GpuRegister rs, Mips64Label* label); + void Bnezc(GpuRegister rs, Mips64Label* label); void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); @@ -277,43 +328,42 @@ class Mips64Assembler FINAL : public Assembler { void Emit(uint32_t value); // - // Overridden common assembler high-level functionality + // Overridden common assembler high-level functionality. // - // Emit code that will create an activation on the stack + // Emit code that will create an activation on the stack. void BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; - // Emit code that will remove an activation from the stack + // Emit code that will remove an activation from the stack. void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE; void IncreaseFrameSize(size_t adjust) OVERRIDE; void DecreaseFrameSize(size_t adjust) OVERRIDE; - // Store routines + // Store routines. void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE; void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE; void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE; void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE; - void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister mscratch) OVERRIDE; - - void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + void StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; - void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE; + void StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) OVERRIDE; void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off, ManagedRegister mscratch) OVERRIDE; - // Load routines + // Load routines. void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE; - void LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) OVERRIDE; + void LoadFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> src, + size_t size) OVERRIDE; void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; @@ -322,15 +372,16 @@ class Mips64Assembler FINAL : public Assembler { void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE; - void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) OVERRIDE; + void LoadRawPtrFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> offs) OVERRIDE; - // Copying routines + // Copying routines. void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE; - void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs, + void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMipsDoublewordSize> thr_offs, ManagedRegister mscratch) OVERRIDE; - void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + void CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE; @@ -354,13 +405,13 @@ class Mips64Assembler FINAL : public Assembler { void MemoryBarrier(ManagedRegister) OVERRIDE; - // Sign extension + // Sign extension. void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; - // Zero extension + // Zero extension. void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; - // Exploit fast access in managed code to Thread::Current() + // Exploit fast access in managed code to Thread::Current(). void GetCurrentThread(ManagedRegister tr) OVERRIDE; void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE; @@ -376,7 +427,7 @@ class Mips64Assembler FINAL : public Assembler { void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister mscratch, bool null_allowed) OVERRIDE; - // src holds a handle scope entry (Object**) load this into dst + // src holds a handle scope entry (Object**) load this into dst. void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; // Heap::VerifyObject on src. In some cases (such as a reference to this) we @@ -384,37 +435,253 @@ class Mips64Assembler FINAL : public Assembler { void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; - // Call to address held at [base+offset] + // Call to address held at [base+offset]. void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE; void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE; - void CallFromThread64(ThreadOffset<8> offset, ManagedRegister mscratch) OVERRIDE; + void CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset, + ManagedRegister mscratch) OVERRIDE; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE; + // Emit slow paths queued during assembly and promote short branches to long if needed. + void FinalizeCode() OVERRIDE; + + // Emit branches and finalize all instructions. + void FinalizeInstructions(const MemoryRegion& region); + + // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64, + // must be used instead of Mips64Label::GetPosition()). + uint32_t GetLabelLocation(Mips64Label* label) const; + + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position); + + enum BranchCondition { + kCondLT, + kCondGE, + kCondLE, + kCondGT, + kCondLTZ, + kCondGEZ, + kCondLEZ, + kCondGTZ, + kCondEQ, + kCondNE, + kCondEQZ, + kCondNEZ, + kCondLTU, + kCondGEU, + kUncond, + }; + friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs); + private: + class Branch { + public: + enum Type { + // Short branches. + kUncondBranch, + kCondBranch, + kCall, + // Long branches. + kLongUncondBranch, + kLongCondBranch, + kLongCall, + }; + + // Bit sizes of offsets defined as enums to minimize chance of typos. + enum OffsetBits { + kOffset16 = 16, + kOffset18 = 18, + kOffset21 = 21, + kOffset23 = 23, + kOffset28 = 28, + kOffset32 = 32, + }; + + static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_ + static constexpr int32_t kMaxBranchLength = 32; + static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t); + + struct BranchInfo { + // Branch length as a number of 4-byte-long instructions. + uint32_t length; + // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's + // PC-relative offset (or its most significant 16-bit half, which goes first). + uint32_t instr_offset; + // Different MIPS instructions with PC-relative offsets apply said offsets to slightly + // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte + // instructions) from the instruction containing the offset. + uint32_t pc_org; + // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is + // an exception: use kOffset23 for beqzc/bnezc). + OffsetBits offset_size; + // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift + // count. + int offset_shift; + }; + static const BranchInfo branch_info_[/* Type */]; + + // Unconditional branch. + Branch(uint32_t location, uint32_t target); + // Conditional branch. + Branch(uint32_t location, + uint32_t target, + BranchCondition condition, + GpuRegister lhs_reg, + GpuRegister rhs_reg = ZERO); + // Call (branch and link) that stores the target address in a given register (i.e. T9). + Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg); + + // Some conditional branches with lhs = rhs are effectively NOPs, while some + // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. + // So, we need a way to identify such branches in order to emit no instructions for them + // or change them to unconditional. + static bool IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); + static bool IsUncond(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); + + static BranchCondition OppositeCondition(BranchCondition cond); + + Type GetType() const; + BranchCondition GetCondition() const; + GpuRegister GetLeftRegister() const; + GpuRegister GetRightRegister() const; + uint32_t GetTarget() const; + uint32_t GetLocation() const; + uint32_t GetOldLocation() const; + uint32_t GetLength() const; + uint32_t GetOldLength() const; + uint32_t GetSize() const; + uint32_t GetOldSize() const; + uint32_t GetEndLocation() const; + uint32_t GetOldEndLocation() const; + bool IsLong() const; + bool IsResolved() const; + + // Returns the bit size of the signed offset that the branch instruction can handle. + OffsetBits GetOffsetSize() const; + + // Calculates the distance between two byte locations in the assembler buffer and + // returns the number of bits needed to represent the distance as a signed integer. + // + // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc), + // and 26 (bc) bits, which are additionally shifted left 2 positions at run time. + // + // Composite branches (made of several instructions) with longer reach have 32-bit + // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first). + // The composite branches cover the range of PC + ~+/-2GB. The range is not end-to-end, + // however. Consider the following implementation of a long unconditional branch, for + // example: + // + // auipc at, offset_31_16 // at = pc + sign_extend(offset_31_16) << 16 + // jic at, offset_15_0 // pc = at + sign_extend(offset_15_0) + // + // Both of the above instructions take 16-bit signed offsets as immediate operands. + // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000 + // due to sign extension. This must be compensated for by incrementing offset_31_16 + // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is + // 0x7FFF, adding 1 will overflow the positive offset into the negative range. + // Therefore, the long branch range is something like from PC - 0x80000000 to + // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side. + // + // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special + // case with the addiu instruction and a 16 bit offset. + static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target); + + // Resolve a branch when the target is known. + void Resolve(uint32_t target); + + // Relocate a branch by a given delta if needed due to expansion of this or another + // branch at a given location by this delta (just changes location_ and target_). + void Relocate(uint32_t expand_location, uint32_t delta); + + // If the branch is short, changes its type to long. + void PromoteToLong(); + + // If necessary, updates the type by promoting a short branch to a long branch + // based on the branch location and target. Returns the amount (in bytes) by + // which the branch size has increased. + // max_short_distance caps the maximum distance between location_ and target_ + // that is allowed for short branches. This is for debugging/testing purposes. + // max_short_distance = 0 forces all short branches to become long. + // Use the implicit default argument when not debugging/testing. + uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); + + // Returns the location of the instruction(s) containing the offset. + uint32_t GetOffsetLocation() const; + + // Calculates and returns the offset ready for encoding in the branch instruction(s). + uint32_t GetOffset() const; + + private: + // Completes branch construction by determining and recording its type. + void InitializeType(bool is_call); + // Helper for the above. + void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); + + uint32_t old_location_; // Offset into assembler buffer in bytes. + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + + GpuRegister lhs_reg_; // Left-hand side register in conditional branches or + // indirect call register. + GpuRegister rhs_reg_; // Right-hand side register in conditional branches. + BranchCondition condition_; // Condition for conditional branches. + + Type type_; // Current type of the branch. + Type old_type_; // Initial type of the branch. + }; + friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs); + friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs); + void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct); void EmitRsd(int opcode, GpuRegister rs, GpuRegister rd, int shamt, int funct); void EmitRtd(int opcode, GpuRegister rt, GpuRegister rd, int shamt, int funct); void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm); void EmitI21(int opcode, GpuRegister rs, uint32_t imm21); - void EmitJ(int opcode, uint32_t addr26); + void EmitI26(int opcode, uint32_t imm26); void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct); void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm); + void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21); - DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); -}; + void Buncond(Mips64Label* label); + void Bcond(Mips64Label* label, + BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs = ZERO); + void Call(Mips64Label* label, GpuRegister indirect_reg); + void FinalizeLabeledBranch(Mips64Label* label); -// Slowpath entered when Thread::Current()->_exception is non-null -class Mips64ExceptionSlowPath FINAL : public SlowPath { - public: - Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) - : scratch_(scratch), stack_adjust_(stack_adjust) {} - virtual void Emit(Assembler *sp_asm) OVERRIDE; - private: - const Mips64ManagedRegister scratch_; - const size_t stack_adjust_; + Branch* GetBranch(uint32_t branch_id); + const Branch* GetBranch(uint32_t branch_id) const; + + void PromoteBranches(); + void EmitBranch(Branch* branch); + void EmitBranches(); + void PatchCFI(); + + // Emits exception block. + void EmitExceptionPoll(Mips64ExceptionSlowPath* exception); + + // List of exception blocks to generate at the end of the code cache. + std::vector<Mips64ExceptionSlowPath> exception_blocks_; + + std::vector<Branch> branches_; + + // Whether appending instructions at the end of the buffer or overwriting the existing ones. + bool overwriting_; + // The current overwrite location. + uint32_t overwrite_location_; + + // Data for AdjustedPosition(), see the description there. + uint32_t last_position_adjustment_; + uint32_t last_old_position_; + uint32_t last_branch_id_; + + DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); }; } // namespace mips64 diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 4413906fd7..29a5a88316 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -24,6 +24,8 @@ #include "base/stl_util.h" #include "utils/assembler_test.h" +#define __ GetAssembler()-> + namespace art { struct MIPS64CpuRegisterCompare { @@ -48,8 +50,26 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return "mips64"; } + std::string GetAssemblerCmdName() OVERRIDE { + // We assemble and link for MIPS64R6. See GetAssemblerParameters() for details. + return "gcc"; + } + std::string GetAssemblerParameters() OVERRIDE { - return " --no-warn -march=mips64r6"; + // We assemble and link for MIPS64R6. The reason is that object files produced for MIPS64R6 + // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative + // branches in the .text section and so they require a relocation pass (there's a relocation + // section, .rela.text, that has the needed info to fix up the branches). + return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; + } + + void Pad(std::vector<uint8_t>& data) OVERRIDE { + // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple + // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't + // pad, so, in order for two assembler outputs to match, we need to match the padding as well. + // NOP is encoded as four zero bytes on MIPS. + size_t pad_size = RoundUp(data.size(), 16u) - data.size(); + data.insert(data.end(), pad_size, 0); } std::string GetDisassembleParameters() OVERRIDE { @@ -182,6 +202,71 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return secondary_register_names_[reg]; } + std::string RepeatInsn(size_t count, const std::string& insn) { + std::string result; + for (; count != 0u; --count) { + result += insn; + } + return result; + } + + void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, + mips64::Mips64Label*), + std::string instr_name) { + mips64::Mips64Label label; + (Base::GetAssembler()->*f)(mips64::A0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + (Base::GetAssembler()->*f)(mips64::A1, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a1, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + + void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, + mips64::GpuRegister, + mips64::Mips64Label*), + std::string instr_name) { + mips64::Mips64Label label; + (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, $a1, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a2, $a3, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + private: std::vector<mips64::GpuRegister*> registers_; std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_; @@ -194,7 +279,6 @@ TEST_F(AssemblerMIPS64Test, Toolchain) { EXPECT_TRUE(CheckTools()); } - /////////////////// // FP Operations // /////////////////// @@ -348,7 +432,203 @@ TEST_F(AssemblerMIPS64Test, CvtSW) { //////////////// TEST_F(AssemblerMIPS64Test, Jalr) { - DriverStr(RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); + DriverStr(".set noreorder\n" + + RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); +} + +TEST_F(AssemblerMIPS64Test, Jialc) { + mips64::Mips64Label label1, label2; + __ Jialc(&label1, mips64::T9); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Jialc(&label2, mips64::T9); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Jialc(&label1, mips64::T9); + + std::string expected = + ".set noreorder\n" + "lapc $t9, 1f\n" + "jialc $t9, 0\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "lapc $t9, 2f\n" + "jialc $t9, 0\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "lapc $t9, 1b\n" + "jialc $t9, 0\n"; + DriverStr(expected, "Jialc"); +} + +TEST_F(AssemblerMIPS64Test, LongJialc) { + mips64::Mips64Label label1, label2; + __ Jialc(&label1, mips64::T9); + constexpr uint32_t kAdduCount1 = (1u << 18) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Jialc(&label2, mips64::T9); + constexpr uint32_t kAdduCount2 = (1u << 18) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Jialc(&label1, mips64::T9); + + uint32_t offset_forward1 = 3 + kAdduCount1; // 3: account for auipc, daddiu and jic. + offset_forward1 <<= 2; + offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in daddiu. + + uint32_t offset_forward2 = 3 + kAdduCount2; // 3: account for auipc, daddiu and jic. + offset_forward2 <<= 2; + offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in daddiu. + + uint32_t offset_back = -(3 + kAdduCount2); // 3: account for auipc, daddiu and jic. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in daddiu. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_forward1) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward1) << "\n" + "jialc $t9, 0\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "1:\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_forward2) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward2) << "\n" + "jialc $t9, 0\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "2:\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_back) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "jialc $t9, 0\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongJialc"); +} + +TEST_F(AssemblerMIPS64Test, Bc) { + mips64::Mips64Label label1, label2; + __ Bc(&label1); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Bc(&label2); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Bc(&label1); + + std::string expected = + ".set noreorder\n" + "bc 1f\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "bc 2f\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "bc 1b\n"; + DriverStr(expected, "Bc"); +} + +TEST_F(AssemblerMIPS64Test, Beqzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc"); +} + +TEST_F(AssemblerMIPS64Test, Bnezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc"); +} + +TEST_F(AssemblerMIPS64Test, Bltzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc"); +} + +TEST_F(AssemblerMIPS64Test, Bgezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc"); +} + +TEST_F(AssemblerMIPS64Test, Blezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc"); +} + +TEST_F(AssemblerMIPS64Test, Bgtzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc"); +} + +TEST_F(AssemblerMIPS64Test, Beqc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc"); +} + +TEST_F(AssemblerMIPS64Test, Bnec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec"); +} + +TEST_F(AssemblerMIPS64Test, Bltc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc"); +} + +TEST_F(AssemblerMIPS64Test, Bgec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec"); +} + +TEST_F(AssemblerMIPS64Test, Bltuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc"); +} + +TEST_F(AssemblerMIPS64Test, Bgeuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc"); +} + +TEST_F(AssemblerMIPS64Test, LongBeqc) { + mips64::Mips64Label label; + __ Beqc(mips64::A0, mips64::A1, &label); + constexpr uint32_t kAdduCount1 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr uint32_t kAdduCount2 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Beqc(mips64::A2, mips64::A3, &label); + + uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic. + offset_forward <<= 2; + offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic. + + uint32_t offset_back = -(kAdduCount2 + 1); // 1: account for bnec. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "bnec $a0, $a1, 1f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" + "1:\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "2:\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "bnec $a2, $a3, 3f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "3:\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBeqc"); } ////////// |