diff options
author | 2025-03-11 02:36:54 -0700 | |
---|---|---|
committer | 2025-03-17 03:22:31 -0700 | |
commit | 49189d0836f45cc2115ca57c0cf2a471706ed231 (patch) | |
tree | 342c983000ed3f833a2eaff7e9941b3d50228c2b /compiler/optimizing/fast_compiler_arm64.cc | |
parent | 38a1949da3b69443cf1d88b3dedaccc7c5fe2c90 (diff) |
Reland "Implement if instructions in fast baseline compiler."
This reverts commit ee8ab3125768bbf60347c507103a910553acd158.
Reason for revert: Fix logic around branching.
Change-Id: Ifadcb75488fe25dd612451c5813ea797b9da3b5c
Diffstat (limited to 'compiler/optimizing/fast_compiler_arm64.cc')
-rw-r--r-- | compiler/optimizing/fast_compiler_arm64.cc | 240 |
1 files changed, 230 insertions, 10 deletions
diff --git a/compiler/optimizing/fast_compiler_arm64.cc b/compiler/optimizing/fast_compiler_arm64.cc index 4ce4f45bee..41b0a156d1 100644 --- a/compiler/optimizing/fast_compiler_arm64.cc +++ b/compiler/optimizing/fast_compiler_arm64.cc @@ -33,6 +33,7 @@ #pragma GCC diagnostic ignored "-Wshadow" #include "aarch64/disasm-aarch64.h" #include "aarch64/macro-assembler-aarch64.h" +#include "aarch64/disasm-aarch64.h" #pragma GCC diagnostic pop using namespace vixl::aarch64; // NOLINT(build/namespaces) @@ -119,11 +120,19 @@ class FastCompilerARM64 : public FastCompiler { code_generation_data_(CodeGenerationData::Create(arena_stack, InstructionSet::kArm64)), vreg_locations_(dex_compilation_unit.GetCodeItemAccessor().RegistersSize(), allocator->Adapter()), + branch_targets_(dex_compilation_unit.GetCodeItemAccessor().InsnsSizeInCodeUnits(), + allocator->Adapter()), + object_register_masks_(dex_compilation_unit.GetCodeItemAccessor().InsnsSizeInCodeUnits(), + allocator->Adapter()), + is_non_null_masks_(dex_compilation_unit.GetCodeItemAccessor().InsnsSizeInCodeUnits(), + allocator->Adapter()), has_frame_(false), core_spill_mask_(0u), fpu_spill_mask_(0u), object_register_mask_(0u), is_non_null_mask_(0u) { + memset(is_non_null_masks_.data(), ~0, is_non_null_masks_.size() * sizeof(uint64_t)); + memset(object_register_masks_.data(), ~0, object_register_masks_.size() * sizeof(uint64_t)); GetAssembler()->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo()); } @@ -200,6 +209,14 @@ class FastCompilerARM64 : public FastCompiler { // Return the existing register location for `reg`. Location GetExistingRegisterLocation(uint32_t reg, DataType::Type type); + // Move dex registers holding constants into physical registers. Used when + // branching. + void MoveConstantsToRegisters(); + + // Update the masks associated to the given dex_pc. Used when dex_pc is a + // branch target. + void UpdateMasks(uint32_t dex_pc); + // Generate code for one instruction. bool ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc, @@ -214,6 +231,10 @@ class FastCompilerARM64 : public FastCompiler { // Generate code for doing a Java invoke. bool HandleInvoke(const Instruction& instruction, uint32_t dex_pc, InvokeType invoke_type); + // Generate code for IF_* instructions. + template<vixl::aarch64::Condition kCond, bool kCompareWithZero> + bool If_21_22t(const Instruction& instruction, uint32_t dex_pc); + // Generate code for doing a runtime invoke. void InvokeRuntime(QuickEntrypointEnum entrypoint, uint32_t dex_pc); @@ -257,6 +278,21 @@ class FastCompilerARM64 : public FastCompiler { return !(is_non_null_mask_ & (1 << vreg_index)); } + // Get the label associated with the given `dex_pc`. + vixl::aarch64::Label* GetLabelOf(uint32_t dex_pc) { + return &branch_targets_[dex_pc]; + } + + // If we need to abort compilation, clear branch targets, required by vixl. + void AbortCompilation() { + for (vixl::aarch64::Label& label : branch_targets_) { + if (label.IsLinked()) { + __ Bind(&label); + } + } + } + + // Compiler utilities. // Arm64Assembler* GetAssembler() { return &assembler_; } @@ -302,6 +338,17 @@ class FastCompilerARM64 : public FastCompiler { // The current location of each dex register. ArenaVector<Location> vreg_locations_; + // A vector of size code units for dex pcs that are branch targets. + ArenaVector<vixl::aarch64::Label> branch_targets_; + + // For dex pcs that are branch targets, the register mask that will be used at + // the point of that pc. + ArenaVector<uint64_t> object_register_masks_; + + // For dex pcs that are branch targets, the mask for non-null objects that will + // be used at the point of that pc. + ArenaVector<uint64_t> is_non_null_masks_; + // Whether we've created a frame for this compiled method. bool has_frame_; @@ -374,6 +421,22 @@ bool FastCompilerARM64::InitializeParameters() { return true; } +void FastCompilerARM64::MoveConstantsToRegisters() { + for (uint32_t i = 0; i < vreg_locations_.size(); ++i) { + Location location = vreg_locations_[i]; + if (location.IsConstant()) { + vreg_locations_[i] = + CreateNewRegisterLocation(i, DataType::Type::kInt32, /* next= */ nullptr); + MoveLocation(vreg_locations_[i], location, DataType::Type::kInt32); + } + } +} + +void FastCompilerARM64::UpdateMasks(uint32_t dex_pc) { + object_register_masks_[dex_pc] &= object_register_mask_; + is_non_null_masks_[dex_pc] &= is_non_null_mask_; +} + bool FastCompilerARM64::ProcessInstructions() { DCHECK(GetCodeItemAccessor().HasCodeItem()); @@ -390,6 +453,21 @@ bool FastCompilerARM64::ProcessInstructions() { if (it != end) { const DexInstructionPcPair& next_pair = *it; next = &next_pair.Inst(); + if (GetLabelOf(next_pair.DexPc())->IsLinked()) { + // Disable the micro-optimization, as the next instruction is a branch + // target. + next = nullptr; + } + } + vixl::aarch64::Label* label = GetLabelOf(pair.DexPc()); + if (label->IsLinked()) { + // Emulate a branch to this pc. + MoveConstantsToRegisters(); + UpdateMasks(pair.DexPc()); + // Set new masks based on all incoming edges. + is_non_null_mask_ = is_non_null_masks_[pair.DexPc()]; + object_register_mask_ = object_register_masks_[pair.DexPc()]; + __ Bind(label); } if (!ProcessDexInstruction(pair.Inst(), pair.DexPc(), next)) { @@ -776,6 +854,11 @@ bool FastCompilerARM64::HandleInvoke(const Instruction& instruction, } else if (invoke_type == kInterface) { offset = resolved_method->GetImtIndex(); } + + if (resolved_method->IsStringConstructor()) { + unimplemented_reason_ = "StringConstructor"; + return false; + } } // Given we are calling a method, generate a frame. @@ -1015,6 +1098,106 @@ bool FastCompilerARM64::CanGenerateCodeFor(ArtField* field, bool can_receiver_be return true; } +#define DO_CASE(arm_op, op, other) \ + case arm_op: { \ + if (constant op other) { \ + __ B(label); \ + } \ + return true; \ + } \ + +template<vixl::aarch64::Condition kCond, bool kCompareWithZero> +bool FastCompilerARM64::If_21_22t(const Instruction& instruction, uint32_t dex_pc) { + DCHECK_EQ(kCompareWithZero ? Instruction::Format::k21t : Instruction::Format::k22t, + Instruction::FormatOf(instruction.Opcode())); + EnsureHasFrame(); + int32_t target_offset = kCompareWithZero ? instruction.VRegB_21t() : instruction.VRegC_22t(); + DCHECK_EQ(target_offset, instruction.GetTargetOffset()); + if (target_offset < 0) { + // TODO: Support for negative branches requires two passes. + unimplemented_reason_ = "NegativeBranch"; + return false; + } + int32_t register_index = kCompareWithZero ? instruction.VRegA_21t() : instruction.VRegA_22t(); + vixl::aarch64::Label* label = GetLabelOf(dex_pc + target_offset); + Location location = vreg_locations_[register_index]; + + if (kCompareWithZero) { + // We are going to branch, move all constants to registers to make the merge + // point use the same locations. + MoveConstantsToRegisters(); + UpdateMasks(dex_pc + target_offset); + if (location.IsConstant()) { + DCHECK(location.GetConstant()->IsIntConstant()); + int32_t constant = location.GetConstant()->AsIntConstant()->GetValue(); + switch (kCond) { + DO_CASE(vixl::aarch64::eq, ==, 0); + DO_CASE(vixl::aarch64::ne, !=, 0); + DO_CASE(vixl::aarch64::lt, <, 0); + DO_CASE(vixl::aarch64::le, <=, 0); + DO_CASE(vixl::aarch64::gt, >, 0); + DO_CASE(vixl::aarch64::ge, >=, 0); + } + return true; + } else if (location.IsRegister()) { + CPURegister reg = CPURegisterFrom(location, DataType::Type::kInt32); + switch (kCond) { + case vixl::aarch64::eq: { + __ Cbz(Register(reg), label); + return true; + } + case vixl::aarch64::ne: { + __ Cbnz(Register(reg), label); + return true; + } + default: { + __ Cmp(Register(reg), 0); + __ B(kCond, label); + return true; + } + } + } else { + DCHECK(location.IsStackSlot()); + unimplemented_reason_ = "CompareWithZeroOnStackSlot"; + } + return false; + } + + // !kCompareWithZero + Location other_location = vreg_locations_[instruction.VRegB_22t()]; + // We are going to branch, move all constants to registers to make the merge + // point use the same locations. + MoveConstantsToRegisters(); + UpdateMasks(dex_pc + target_offset); + if (location.IsConstant() && other_location.IsConstant()) { + int32_t constant = location.GetConstant()->AsIntConstant()->GetValue(); + int32_t other_constant = other_location.GetConstant()->AsIntConstant()->GetValue(); + switch (kCond) { + DO_CASE(vixl::aarch64::eq, ==, other_constant); + DO_CASE(vixl::aarch64::ne, !=, other_constant); + DO_CASE(vixl::aarch64::lt, <, other_constant); + DO_CASE(vixl::aarch64::le, <=, other_constant); + DO_CASE(vixl::aarch64::gt, >, other_constant); + DO_CASE(vixl::aarch64::ge, >=, other_constant); + } + return true; + } + // Reload the locations, which can now be registers. + location = vreg_locations_[register_index]; + other_location = vreg_locations_[instruction.VRegB_22t()]; + if (location.IsRegister() && other_location.IsRegister()) { + CPURegister reg = CPURegisterFrom(location, DataType::Type::kInt32); + CPURegister other_reg = CPURegisterFrom(other_location, DataType::Type::kInt32); + __ Cmp(Register(reg), Register(other_reg)); + __ B(kCond, label); + return true; + } + + unimplemented_reason_ = "UnimplementedCompare"; + return false; +} +#undef DO_CASE + bool FastCompilerARM64::ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc, const Instruction* next) { @@ -1091,15 +1274,17 @@ bool FastCompilerARM64::ProcessDexInstruction(const Instruction& instruction, } #define IF_XX(comparison, cond) \ - case Instruction::IF_##cond: break; \ - case Instruction::IF_##cond##Z: break - - IF_XX(HEqual, EQ); - IF_XX(HNotEqual, NE); - IF_XX(HLessThan, LT); - IF_XX(HLessThanOrEqual, LE); - IF_XX(HGreaterThan, GT); - IF_XX(HGreaterThanOrEqual, GE); + case Instruction::IF_##cond: \ + return If_21_22t<comparison, /* kCompareWithZero= */ false>(instruction, dex_pc); \ + case Instruction::IF_##cond##Z: \ + return If_21_22t<comparison, /* kCompareWithZero= */ true>(instruction, dex_pc); + + IF_XX(vixl::aarch64::eq, EQ); + IF_XX(vixl::aarch64::ne, NE); + IF_XX(vixl::aarch64::lt, LT); + IF_XX(vixl::aarch64::le, LE); + IF_XX(vixl::aarch64::gt, GT); + IF_XX(vixl::aarch64::ge, GE); case Instruction::GOTO: case Instruction::GOTO_16: @@ -1114,6 +1299,15 @@ bool FastCompilerARM64::ProcessDexInstruction(const Instruction& instruction, MoveLocation(convention.GetReturnLocation(return_type_), vreg_locations_[register_index], return_type_); + if (has_frame_) { + // We may have used the "record last instruction before return in return + // register" optimization (see `CreateNewRegisterLocation`), + // so set the returned register back to a callee save location in case the + // method has a frame and there are instructions after this return that + // may use this register. + uint32_t register_code = kAvailableCalleeSaveRegisters[register_index].GetCode(); + vreg_locations_[register_index] = Location::RegisterLocation(register_code); + } DropFrameAndReturn(); return true; } @@ -1960,13 +2154,16 @@ bool FastCompilerARM64::ProcessDexInstruction(const Instruction& instruction, bool FastCompilerARM64::Compile() { if (!InitializeParameters()) { DCHECK(HitUnimplemented()); + AbortCompilation(); return false; } if (!ProcessInstructions()) { DCHECK(HitUnimplemented()); + AbortCompilation(); return false; } if (HitUnimplemented()) { + AbortCompilation(); return false; } if (!has_frame_) { @@ -1978,7 +2175,30 @@ bool FastCompilerARM64::Compile() { /* is_debuggable= */ false); } code_generation_data_->GetStackMapStream()->EndMethod(assembler_.CodeSize()); - GetVIXLAssembler()->FinalizeCode(); + assembler_.FinalizeCode(); + + if (VLOG_IS_ON(jit)) { + // Dump the generated code + { + ScopedObjectAccess soa(Thread::Current()); + VLOG(jit) << "Dumping generated fast baseline code for " << method_->PrettyMethod(); + } + FILE* file = tmpfile(); + MacroAssembler* masm = GetVIXLAssembler(); + PrintDisassembler print_disasm(file); + vixl::aarch64::Instruction* dis_start = + masm->GetBuffer()->GetStartAddress<vixl::aarch64::Instruction*>(); + vixl::aarch64::Instruction* dis_end = + masm->GetBuffer()->GetEndAddress<vixl::aarch64::Instruction*>(); + print_disasm.DisassembleBuffer(dis_start, dis_end); + fseek(file, 0L, SEEK_SET); + char buffer[1024]; + const char* line; + while ((line = fgets(buffer, sizeof(buffer), file)) != nullptr) { + VLOG(jit) << std::string(line); + } + fclose(file); + } return true; } |