diff options
author | 2025-02-14 14:35:55 +0000 | |
---|---|---|
committer | 2025-03-03 00:27:46 -0800 | |
commit | 31308e27c488a857d950c47148cdf783ea3a8447 (patch) | |
tree | 60dce5deb6496f7fdba42001b47a4fb9b3beab78 | |
parent | e280e935f1faa066ab8c0e2ab5bf87d064065412 (diff) |
Implement if instructions in fast baseline compiler.
Test: test.py
Change-Id: I481535be7a2269eb2de1f7fdc837a5f9e41666db
-rw-r--r-- | compiler/optimizing/fast_compiler_arm64.cc | 205 |
1 files changed, 195 insertions, 10 deletions
diff --git a/compiler/optimizing/fast_compiler_arm64.cc b/compiler/optimizing/fast_compiler_arm64.cc index 4ce4f45bee..f8cb2c7fe5 100644 --- a/compiler/optimizing/fast_compiler_arm64.cc +++ b/compiler/optimizing/fast_compiler_arm64.cc @@ -33,6 +33,7 @@ #pragma GCC diagnostic ignored "-Wshadow" #include "aarch64/disasm-aarch64.h" #include "aarch64/macro-assembler-aarch64.h" +#include "aarch64/disasm-aarch64.h" #pragma GCC diagnostic pop using namespace vixl::aarch64; // NOLINT(build/namespaces) @@ -119,6 +120,8 @@ class FastCompilerARM64 : public FastCompiler { code_generation_data_(CodeGenerationData::Create(arena_stack, InstructionSet::kArm64)), vreg_locations_(dex_compilation_unit.GetCodeItemAccessor().RegistersSize(), allocator->Adapter()), + branch_targets_(dex_compilation_unit.GetCodeItemAccessor().InsnsSizeInCodeUnits(), + allocator->Adapter()), has_frame_(false), core_spill_mask_(0u), fpu_spill_mask_(0u), @@ -200,6 +203,10 @@ class FastCompilerARM64 : public FastCompiler { // Return the existing register location for `reg`. Location GetExistingRegisterLocation(uint32_t reg, DataType::Type type); + // Move dex registers holding constants into physical registers. Used when + // branching. + void MoveConstantsToRegisters(); + // Generate code for one instruction. bool ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc, @@ -214,6 +221,10 @@ class FastCompilerARM64 : public FastCompiler { // Generate code for doing a Java invoke. bool HandleInvoke(const Instruction& instruction, uint32_t dex_pc, InvokeType invoke_type); + // Generate code for IF_* instructions. + template<vixl::aarch64::Condition kCond, bool kCompareWithZero> + bool If_21_22t(const Instruction& instruction, uint32_t dex_pc); + // Generate code for doing a runtime invoke. void InvokeRuntime(QuickEntrypointEnum entrypoint, uint32_t dex_pc); @@ -257,6 +268,21 @@ class FastCompilerARM64 : public FastCompiler { return !(is_non_null_mask_ & (1 << vreg_index)); } + // Get the label associated with the given `dex_pc`. + vixl::aarch64::Label* GetLabelOf(uint32_t dex_pc) { + return &branch_targets_[dex_pc]; + } + + // If we need to abort compilation, clear branch targets, required by vixl. + void AbortCompilation() { + for (vixl::aarch64::Label& label : branch_targets_) { + if (label.IsLinked()) { + __ Bind(&label); + } + } + } + + // Compiler utilities. // Arm64Assembler* GetAssembler() { return &assembler_; } @@ -302,6 +328,9 @@ class FastCompilerARM64 : public FastCompiler { // The current location of each dex register. ArenaVector<Location> vreg_locations_; + // A vector of size code units for dex pcs that are branch targets. + ArenaVector<vixl::aarch64::Label> branch_targets_; + // Whether we've created a frame for this compiled method. bool has_frame_; @@ -374,6 +403,17 @@ bool FastCompilerARM64::InitializeParameters() { return true; } +void FastCompilerARM64::MoveConstantsToRegisters() { + for (uint32_t i = 0; i < vreg_locations_.size(); ++i) { + Location location = vreg_locations_[i]; + if (location.IsConstant()) { + vreg_locations_[i] = + CreateNewRegisterLocation(i, DataType::Type::kInt32, /* next= */ nullptr); + MoveLocation(vreg_locations_[i], location, DataType::Type::kInt32); + } + } +} + bool FastCompilerARM64::ProcessInstructions() { DCHECK(GetCodeItemAccessor().HasCodeItem()); @@ -391,6 +431,11 @@ bool FastCompilerARM64::ProcessInstructions() { const DexInstructionPcPair& next_pair = *it; next = &next_pair.Inst(); } + vixl::aarch64::Label* label = GetLabelOf(pair.DexPc()); + if (label->IsLinked()) { + MoveConstantsToRegisters(); + __ Bind(label); + } if (!ProcessDexInstruction(pair.Inst(), pair.DexPc(), next)) { DCHECK(HitUnimplemented()); @@ -776,6 +821,11 @@ bool FastCompilerARM64::HandleInvoke(const Instruction& instruction, } else if (invoke_type == kInterface) { offset = resolved_method->GetImtIndex(); } + + if (resolved_method->IsStringConstructor()) { + unimplemented_reason_ = "StringConstructor"; + return false; + } } // Given we are calling a method, generate a frame. @@ -1015,6 +1065,104 @@ bool FastCompilerARM64::CanGenerateCodeFor(ArtField* field, bool can_receiver_be return true; } +#define DO_CASE(arm_op, op, other) \ + case arm_op: { \ + if (constant op other) { \ + __ B(label); \ + } \ + return true; \ + } \ + +template<vixl::aarch64::Condition kCond, bool kCompareWithZero> +bool FastCompilerARM64::If_21_22t(const Instruction& instruction, uint32_t dex_pc) { + DCHECK_EQ(kCompareWithZero ? Instruction::Format::k21t : Instruction::Format::k22t, + Instruction::FormatOf(instruction.Opcode())); + EnsureHasFrame(); + int32_t target_offset = kCompareWithZero ? instruction.VRegB_21t() : instruction.VRegC_22t(); + DCHECK_EQ(target_offset, instruction.GetTargetOffset()); + if (target_offset < 0) { + // TODO: Support for negative branches requires two passes. + unimplemented_reason_ = "NegativeBranch"; + return false; + } + int32_t register_index = kCompareWithZero ? instruction.VRegA_21t() : instruction.VRegA_22t(); + vixl::aarch64::Label* label = GetLabelOf(dex_pc + target_offset); + Location location = vreg_locations_[register_index]; + + if (kCompareWithZero) { + // We are going to branch, move all constants to registers to make the merge + // point use the same locations. + MoveConstantsToRegisters(); + if (location.IsConstant()) { + DCHECK(location.GetConstant()->IsIntConstant()); + int32_t constant = location.GetConstant()->AsIntConstant()->GetValue(); + switch (kCond) { + DO_CASE(vixl::aarch64::eq, ==, 0); + DO_CASE(vixl::aarch64::ne, !=, 0); + DO_CASE(vixl::aarch64::lt, <, 0); + DO_CASE(vixl::aarch64::le, <=, 0); + DO_CASE(vixl::aarch64::gt, >, 0); + DO_CASE(vixl::aarch64::ge, >=, 0); + } + return true; + } else if (location.IsRegister()) { + CPURegister reg = CPURegisterFrom(location, DataType::Type::kInt32); + switch (kCond) { + case vixl::aarch64::eq: { + __ Cbz(Register(reg), label); + return true; + } + case vixl::aarch64::ne: { + __ Cbnz(Register(reg), label); + return true; + } + default: { + __ Cmp(Register(reg), 0); + __ B(kCond, label); + return true; + } + } + } else { + DCHECK(location.IsStackSlot()); + unimplemented_reason_ = "CompareWithZeroOnStackSlot"; + } + return false; + } + + // !kCompareWithZero + Location other_location = vreg_locations_[instruction.VRegB_22t()]; + // We are going to branch, move all constants to registers to make the merge + // point use the same locations. + MoveConstantsToRegisters(); + if (location.IsConstant() && other_location.IsConstant()) { + int32_t constant = location.GetConstant()->AsIntConstant()->GetValue(); + int32_t other_constant = other_location.GetConstant()->AsIntConstant()->GetValue(); + switch (kCond) { + DO_CASE(vixl::aarch64::eq, ==, other_constant); + DO_CASE(vixl::aarch64::ne, !=, other_constant); + DO_CASE(vixl::aarch64::lt, <, other_constant); + DO_CASE(vixl::aarch64::le, <=, other_constant); + DO_CASE(vixl::aarch64::gt, >, other_constant); + DO_CASE(vixl::aarch64::ge, >=, other_constant); + } + return true; + } + // Reload the locations, which can now be registers. + location = vreg_locations_[register_index]; + other_location = vreg_locations_[instruction.VRegB_22t()]; + if (location.IsRegister() && other_location.IsRegister()) { + CPURegister reg = CPURegisterFrom(location, DataType::Type::kInt32); + CPURegister other_reg = CPURegisterFrom(other_location, DataType::Type::kInt32); + __ Cmp(Register(reg), Register(other_reg)); + __ B(kCond, label); + return true; + } + + unimplemented_reason_ = "UnimplementedCompare"; + return false; +} +#undef DO_CASE + bool FastCompilerARM64::ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc, const Instruction* next) { @@ -1091,15 +1239,17 @@ bool FastCompilerARM64::ProcessDexInstruction(const Instruction& instruction, } #define IF_XX(comparison, cond) \ - case Instruction::IF_##cond: break; \ - case Instruction::IF_##cond##Z: break - - IF_XX(HEqual, EQ); - IF_XX(HNotEqual, NE); - IF_XX(HLessThan, LT); - IF_XX(HLessThanOrEqual, LE); - IF_XX(HGreaterThan, GT); - IF_XX(HGreaterThanOrEqual, GE); + case Instruction::IF_##cond: \ + return If_21_22t<comparison, /* kCompareWithZero= */ false>(instruction, dex_pc); \ + case Instruction::IF_##cond##Z: \ + return If_21_22t<comparison, /* kCompareWithZero= */ true>(instruction, dex_pc); + + IF_XX(vixl::aarch64::eq, EQ); + IF_XX(vixl::aarch64::ne, NE); + IF_XX(vixl::aarch64::lt, LT); + IF_XX(vixl::aarch64::le, LE); + IF_XX(vixl::aarch64::gt, GT); + IF_XX(vixl::aarch64::ge, GE); case Instruction::GOTO: case Instruction::GOTO_16: @@ -1114,6 +1264,15 @@ bool FastCompilerARM64::ProcessDexInstruction(const Instruction& instruction, MoveLocation(convention.GetReturnLocation(return_type_), vreg_locations_[register_index], return_type_); + if (has_frame_) { + // We may have used the "record last instruction before return in return + // register" optimization (see `CreateNewRegisterLocation`), + // so set the returned register back to a callee save location in case the + // method has a frame and there are instructions after this return that + // may use this register. + uint32_t register_code = kAvailableCalleeSaveRegisters[register_index].GetCode(); + vreg_locations_[register_index] = Location::RegisterLocation(register_code); + } DropFrameAndReturn(); return true; } @@ -1960,13 +2119,16 @@ bool FastCompilerARM64::ProcessDexInstruction(const Instruction& instruction, bool FastCompilerARM64::Compile() { if (!InitializeParameters()) { DCHECK(HitUnimplemented()); + AbortCompilation(); return false; } if (!ProcessInstructions()) { DCHECK(HitUnimplemented()); + AbortCompilation(); return false; } if (HitUnimplemented()) { + AbortCompilation(); return false; } if (!has_frame_) { @@ -1978,7 +2140,30 @@ bool FastCompilerARM64::Compile() { /* is_debuggable= */ false); } code_generation_data_->GetStackMapStream()->EndMethod(assembler_.CodeSize()); - GetVIXLAssembler()->FinalizeCode(); + assembler_.FinalizeCode(); + + if (VLOG_IS_ON(jit)) { + // Dump the generated code + { + ScopedObjectAccess soa(Thread::Current()); + VLOG(jit) << "Dumping generated fast baseline code for " << method_->PrettyMethod(); + } + FILE* file = tmpfile(); + MacroAssembler* masm = GetVIXLAssembler(); + PrintDisassembler print_disasm(file); + vixl::aarch64::Instruction* dis_start = + masm->GetBuffer()->GetStartAddress<vixl::aarch64::Instruction*>(); + vixl::aarch64::Instruction* dis_end = + masm->GetBuffer()->GetEndAddress<vixl::aarch64::Instruction*>(); + print_disasm.DisassembleBuffer(dis_start, dis_end); + fseek(file, 0L, SEEK_SET); + char buffer[1024]; + const char* line; + while ((line = fgets(buffer, sizeof(buffer), file)) != nullptr) { + VLOG(jit) << std::string(line); + } + fclose(file); + } return true; } |