summaryrefslogtreecommitdiff
path: root/compiler/optimizing/fast_compiler_arm64.cc
diff options
context:
space:
mode:
author Nicolas Geoffray <ngeoffray@google.com> 2025-03-11 02:36:54 -0700
committer Nicolas Geoffray <ngeoffray@google.com> 2025-03-17 03:22:31 -0700
commit49189d0836f45cc2115ca57c0cf2a471706ed231 (patch)
tree342c983000ed3f833a2eaff7e9941b3d50228c2b /compiler/optimizing/fast_compiler_arm64.cc
parent38a1949da3b69443cf1d88b3dedaccc7c5fe2c90 (diff)
Reland "Implement if instructions in fast baseline compiler."
This reverts commit ee8ab3125768bbf60347c507103a910553acd158. Reason for revert: Fix logic around branching. Change-Id: Ifadcb75488fe25dd612451c5813ea797b9da3b5c
Diffstat (limited to 'compiler/optimizing/fast_compiler_arm64.cc')
-rw-r--r--compiler/optimizing/fast_compiler_arm64.cc240
1 files changed, 230 insertions, 10 deletions
diff --git a/compiler/optimizing/fast_compiler_arm64.cc b/compiler/optimizing/fast_compiler_arm64.cc
index 4ce4f45bee..41b0a156d1 100644
--- a/compiler/optimizing/fast_compiler_arm64.cc
+++ b/compiler/optimizing/fast_compiler_arm64.cc
@@ -33,6 +33,7 @@
#pragma GCC diagnostic ignored "-Wshadow"
#include "aarch64/disasm-aarch64.h"
#include "aarch64/macro-assembler-aarch64.h"
+#include "aarch64/disasm-aarch64.h"
#pragma GCC diagnostic pop
using namespace vixl::aarch64; // NOLINT(build/namespaces)
@@ -119,11 +120,19 @@ class FastCompilerARM64 : public FastCompiler {
code_generation_data_(CodeGenerationData::Create(arena_stack, InstructionSet::kArm64)),
vreg_locations_(dex_compilation_unit.GetCodeItemAccessor().RegistersSize(),
allocator->Adapter()),
+ branch_targets_(dex_compilation_unit.GetCodeItemAccessor().InsnsSizeInCodeUnits(),
+ allocator->Adapter()),
+ object_register_masks_(dex_compilation_unit.GetCodeItemAccessor().InsnsSizeInCodeUnits(),
+ allocator->Adapter()),
+ is_non_null_masks_(dex_compilation_unit.GetCodeItemAccessor().InsnsSizeInCodeUnits(),
+ allocator->Adapter()),
has_frame_(false),
core_spill_mask_(0u),
fpu_spill_mask_(0u),
object_register_mask_(0u),
is_non_null_mask_(0u) {
+ memset(is_non_null_masks_.data(), ~0, is_non_null_masks_.size() * sizeof(uint64_t));
+ memset(object_register_masks_.data(), ~0, object_register_masks_.size() * sizeof(uint64_t));
GetAssembler()->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo());
}
@@ -200,6 +209,14 @@ class FastCompilerARM64 : public FastCompiler {
// Return the existing register location for `reg`.
Location GetExistingRegisterLocation(uint32_t reg, DataType::Type type);
+ // Move dex registers holding constants into physical registers. Used when
+ // branching.
+ void MoveConstantsToRegisters();
+
+ // Update the masks associated to the given dex_pc. Used when dex_pc is a
+ // branch target.
+ void UpdateMasks(uint32_t dex_pc);
+
// Generate code for one instruction.
bool ProcessDexInstruction(const Instruction& instruction,
uint32_t dex_pc,
@@ -214,6 +231,10 @@ class FastCompilerARM64 : public FastCompiler {
// Generate code for doing a Java invoke.
bool HandleInvoke(const Instruction& instruction, uint32_t dex_pc, InvokeType invoke_type);
+ // Generate code for IF_* instructions.
+ template<vixl::aarch64::Condition kCond, bool kCompareWithZero>
+ bool If_21_22t(const Instruction& instruction, uint32_t dex_pc);
+
// Generate code for doing a runtime invoke.
void InvokeRuntime(QuickEntrypointEnum entrypoint, uint32_t dex_pc);
@@ -257,6 +278,21 @@ class FastCompilerARM64 : public FastCompiler {
return !(is_non_null_mask_ & (1 << vreg_index));
}
+ // Get the label associated with the given `dex_pc`.
+ vixl::aarch64::Label* GetLabelOf(uint32_t dex_pc) {
+ return &branch_targets_[dex_pc];
+ }
+
+ // If we need to abort compilation, clear branch targets, required by vixl.
+ void AbortCompilation() {
+ for (vixl::aarch64::Label& label : branch_targets_) {
+ if (label.IsLinked()) {
+ __ Bind(&label);
+ }
+ }
+ }
+
+
// Compiler utilities.
//
Arm64Assembler* GetAssembler() { return &assembler_; }
@@ -302,6 +338,17 @@ class FastCompilerARM64 : public FastCompiler {
// The current location of each dex register.
ArenaVector<Location> vreg_locations_;
+ // A vector of size code units for dex pcs that are branch targets.
+ ArenaVector<vixl::aarch64::Label> branch_targets_;
+
+ // For dex pcs that are branch targets, the register mask that will be used at
+ // the point of that pc.
+ ArenaVector<uint64_t> object_register_masks_;
+
+ // For dex pcs that are branch targets, the mask for non-null objects that will
+ // be used at the point of that pc.
+ ArenaVector<uint64_t> is_non_null_masks_;
+
// Whether we've created a frame for this compiled method.
bool has_frame_;
@@ -374,6 +421,22 @@ bool FastCompilerARM64::InitializeParameters() {
return true;
}
+void FastCompilerARM64::MoveConstantsToRegisters() {
+ for (uint32_t i = 0; i < vreg_locations_.size(); ++i) {
+ Location location = vreg_locations_[i];
+ if (location.IsConstant()) {
+ vreg_locations_[i] =
+ CreateNewRegisterLocation(i, DataType::Type::kInt32, /* next= */ nullptr);
+ MoveLocation(vreg_locations_[i], location, DataType::Type::kInt32);
+ }
+ }
+}
+
+void FastCompilerARM64::UpdateMasks(uint32_t dex_pc) {
+ object_register_masks_[dex_pc] &= object_register_mask_;
+ is_non_null_masks_[dex_pc] &= is_non_null_mask_;
+}
+
bool FastCompilerARM64::ProcessInstructions() {
DCHECK(GetCodeItemAccessor().HasCodeItem());
@@ -390,6 +453,21 @@ bool FastCompilerARM64::ProcessInstructions() {
if (it != end) {
const DexInstructionPcPair& next_pair = *it;
next = &next_pair.Inst();
+ if (GetLabelOf(next_pair.DexPc())->IsLinked()) {
+ // Disable the micro-optimization, as the next instruction is a branch
+ // target.
+ next = nullptr;
+ }
+ }
+ vixl::aarch64::Label* label = GetLabelOf(pair.DexPc());
+ if (label->IsLinked()) {
+ // Emulate a branch to this pc.
+ MoveConstantsToRegisters();
+ UpdateMasks(pair.DexPc());
+ // Set new masks based on all incoming edges.
+ is_non_null_mask_ = is_non_null_masks_[pair.DexPc()];
+ object_register_mask_ = object_register_masks_[pair.DexPc()];
+ __ Bind(label);
}
if (!ProcessDexInstruction(pair.Inst(), pair.DexPc(), next)) {
@@ -776,6 +854,11 @@ bool FastCompilerARM64::HandleInvoke(const Instruction& instruction,
} else if (invoke_type == kInterface) {
offset = resolved_method->GetImtIndex();
}
+
+ if (resolved_method->IsStringConstructor()) {
+ unimplemented_reason_ = "StringConstructor";
+ return false;
+ }
}
// Given we are calling a method, generate a frame.
@@ -1015,6 +1098,106 @@ bool FastCompilerARM64::CanGenerateCodeFor(ArtField* field, bool can_receiver_be
return true;
}
+#define DO_CASE(arm_op, op, other) \
+ case arm_op: { \
+ if (constant op other) { \
+ __ B(label); \
+ } \
+ return true; \
+ } \
+
+template<vixl::aarch64::Condition kCond, bool kCompareWithZero>
+bool FastCompilerARM64::If_21_22t(const Instruction& instruction, uint32_t dex_pc) {
+ DCHECK_EQ(kCompareWithZero ? Instruction::Format::k21t : Instruction::Format::k22t,
+ Instruction::FormatOf(instruction.Opcode()));
+ EnsureHasFrame();
+ int32_t target_offset = kCompareWithZero ? instruction.VRegB_21t() : instruction.VRegC_22t();
+ DCHECK_EQ(target_offset, instruction.GetTargetOffset());
+ if (target_offset < 0) {
+ // TODO: Support for negative branches requires two passes.
+ unimplemented_reason_ = "NegativeBranch";
+ return false;
+ }
+ int32_t register_index = kCompareWithZero ? instruction.VRegA_21t() : instruction.VRegA_22t();
+ vixl::aarch64::Label* label = GetLabelOf(dex_pc + target_offset);
+ Location location = vreg_locations_[register_index];
+
+ if (kCompareWithZero) {
+ // We are going to branch, move all constants to registers to make the merge
+ // point use the same locations.
+ MoveConstantsToRegisters();
+ UpdateMasks(dex_pc + target_offset);
+ if (location.IsConstant()) {
+ DCHECK(location.GetConstant()->IsIntConstant());
+ int32_t constant = location.GetConstant()->AsIntConstant()->GetValue();
+ switch (kCond) {
+ DO_CASE(vixl::aarch64::eq, ==, 0);
+ DO_CASE(vixl::aarch64::ne, !=, 0);
+ DO_CASE(vixl::aarch64::lt, <, 0);
+ DO_CASE(vixl::aarch64::le, <=, 0);
+ DO_CASE(vixl::aarch64::gt, >, 0);
+ DO_CASE(vixl::aarch64::ge, >=, 0);
+ }
+ return true;
+ } else if (location.IsRegister()) {
+ CPURegister reg = CPURegisterFrom(location, DataType::Type::kInt32);
+ switch (kCond) {
+ case vixl::aarch64::eq: {
+ __ Cbz(Register(reg), label);
+ return true;
+ }
+ case vixl::aarch64::ne: {
+ __ Cbnz(Register(reg), label);
+ return true;
+ }
+ default: {
+ __ Cmp(Register(reg), 0);
+ __ B(kCond, label);
+ return true;
+ }
+ }
+ } else {
+ DCHECK(location.IsStackSlot());
+ unimplemented_reason_ = "CompareWithZeroOnStackSlot";
+ }
+ return false;
+ }
+
+ // !kCompareWithZero
+ Location other_location = vreg_locations_[instruction.VRegB_22t()];
+ // We are going to branch, move all constants to registers to make the merge
+ // point use the same locations.
+ MoveConstantsToRegisters();
+ UpdateMasks(dex_pc + target_offset);
+ if (location.IsConstant() && other_location.IsConstant()) {
+ int32_t constant = location.GetConstant()->AsIntConstant()->GetValue();
+ int32_t other_constant = other_location.GetConstant()->AsIntConstant()->GetValue();
+ switch (kCond) {
+ DO_CASE(vixl::aarch64::eq, ==, other_constant);
+ DO_CASE(vixl::aarch64::ne, !=, other_constant);
+ DO_CASE(vixl::aarch64::lt, <, other_constant);
+ DO_CASE(vixl::aarch64::le, <=, other_constant);
+ DO_CASE(vixl::aarch64::gt, >, other_constant);
+ DO_CASE(vixl::aarch64::ge, >=, other_constant);
+ }
+ return true;
+ }
+ // Reload the locations, which can now be registers.
+ location = vreg_locations_[register_index];
+ other_location = vreg_locations_[instruction.VRegB_22t()];
+ if (location.IsRegister() && other_location.IsRegister()) {
+ CPURegister reg = CPURegisterFrom(location, DataType::Type::kInt32);
+ CPURegister other_reg = CPURegisterFrom(other_location, DataType::Type::kInt32);
+ __ Cmp(Register(reg), Register(other_reg));
+ __ B(kCond, label);
+ return true;
+ }
+
+ unimplemented_reason_ = "UnimplementedCompare";
+ return false;
+}
+#undef DO_CASE
+
bool FastCompilerARM64::ProcessDexInstruction(const Instruction& instruction,
uint32_t dex_pc,
const Instruction* next) {
@@ -1091,15 +1274,17 @@ bool FastCompilerARM64::ProcessDexInstruction(const Instruction& instruction,
}
#define IF_XX(comparison, cond) \
- case Instruction::IF_##cond: break; \
- case Instruction::IF_##cond##Z: break
-
- IF_XX(HEqual, EQ);
- IF_XX(HNotEqual, NE);
- IF_XX(HLessThan, LT);
- IF_XX(HLessThanOrEqual, LE);
- IF_XX(HGreaterThan, GT);
- IF_XX(HGreaterThanOrEqual, GE);
+ case Instruction::IF_##cond: \
+ return If_21_22t<comparison, /* kCompareWithZero= */ false>(instruction, dex_pc); \
+ case Instruction::IF_##cond##Z: \
+ return If_21_22t<comparison, /* kCompareWithZero= */ true>(instruction, dex_pc);
+
+ IF_XX(vixl::aarch64::eq, EQ);
+ IF_XX(vixl::aarch64::ne, NE);
+ IF_XX(vixl::aarch64::lt, LT);
+ IF_XX(vixl::aarch64::le, LE);
+ IF_XX(vixl::aarch64::gt, GT);
+ IF_XX(vixl::aarch64::ge, GE);
case Instruction::GOTO:
case Instruction::GOTO_16:
@@ -1114,6 +1299,15 @@ bool FastCompilerARM64::ProcessDexInstruction(const Instruction& instruction,
MoveLocation(convention.GetReturnLocation(return_type_),
vreg_locations_[register_index],
return_type_);
+ if (has_frame_) {
+ // We may have used the "record last instruction before return in return
+ // register" optimization (see `CreateNewRegisterLocation`),
+ // so set the returned register back to a callee save location in case the
+ // method has a frame and there are instructions after this return that
+ // may use this register.
+ uint32_t register_code = kAvailableCalleeSaveRegisters[register_index].GetCode();
+ vreg_locations_[register_index] = Location::RegisterLocation(register_code);
+ }
DropFrameAndReturn();
return true;
}
@@ -1960,13 +2154,16 @@ bool FastCompilerARM64::ProcessDexInstruction(const Instruction& instruction,
bool FastCompilerARM64::Compile() {
if (!InitializeParameters()) {
DCHECK(HitUnimplemented());
+ AbortCompilation();
return false;
}
if (!ProcessInstructions()) {
DCHECK(HitUnimplemented());
+ AbortCompilation();
return false;
}
if (HitUnimplemented()) {
+ AbortCompilation();
return false;
}
if (!has_frame_) {
@@ -1978,7 +2175,30 @@ bool FastCompilerARM64::Compile() {
/* is_debuggable= */ false);
}
code_generation_data_->GetStackMapStream()->EndMethod(assembler_.CodeSize());
- GetVIXLAssembler()->FinalizeCode();
+ assembler_.FinalizeCode();
+
+ if (VLOG_IS_ON(jit)) {
+ // Dump the generated code
+ {
+ ScopedObjectAccess soa(Thread::Current());
+ VLOG(jit) << "Dumping generated fast baseline code for " << method_->PrettyMethod();
+ }
+ FILE* file = tmpfile();
+ MacroAssembler* masm = GetVIXLAssembler();
+ PrintDisassembler print_disasm(file);
+ vixl::aarch64::Instruction* dis_start =
+ masm->GetBuffer()->GetStartAddress<vixl::aarch64::Instruction*>();
+ vixl::aarch64::Instruction* dis_end =
+ masm->GetBuffer()->GetEndAddress<vixl::aarch64::Instruction*>();
+ print_disasm.DisassembleBuffer(dis_start, dis_end);
+ fseek(file, 0L, SEEK_SET);
+ char buffer[1024];
+ const char* line;
+ while ((line = fgets(buffer, sizeof(buffer), file)) != nullptr) {
+ VLOG(jit) << std::string(line);
+ }
+ fclose(file);
+ }
return true;
}