/* * Copyright (C) 2023 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "fast_compiler.h" // TODO(VIXL): Make VIXL compile cleanly with -Wshadow, -Wdeprecated-declarations. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include "aarch64/disasm-aarch64.h" #include "aarch64/macro-assembler-aarch64.h" #include "aarch64/disasm-aarch64.h" #pragma GCC diagnostic pop #include "code_generation_data.h" #include "code_generator_arm64.h" #include "data_type-inl.h" #include "dex/code_item_accessors-inl.h" #include "dex/dex_instruction-inl.h" #include "driver/dex_compilation_unit.h" #include "entrypoints/entrypoint_utils-inl.h" #include "jit_patches_arm64.h" #include "nodes.h" #include "thread-inl.h" #include "utils/arm64/assembler_arm64.h" using namespace vixl::aarch64; // NOLINT(build/namespaces) using vixl::ExactAssemblyScope; using vixl::CodeBufferCheckScope; using vixl::EmissionCheckScope; #ifdef __ #error "ARM64 Codegen VIXL macro-assembler macro already defined." #endif #define __ GetVIXLAssembler()-> namespace art HIDDEN { namespace arm64 { using helpers::CPURegisterFrom; using helpers::HeapOperand; using helpers::LocationFrom; using helpers::RegisterFrom; using helpers::WRegisterFrom; using helpers::DRegisterFrom; using helpers::SRegisterFrom; static const vixl::aarch64::Register kAvailableCalleeSaveRegisters[] = { vixl::aarch64::x22, vixl::aarch64::x23, vixl::aarch64::x24, vixl::aarch64::x25, vixl::aarch64::x26, vixl::aarch64::x27, vixl::aarch64::x28, vixl::aarch64::x29, }; static const vixl::aarch64::Register kAvailableTempRegisters[] = { vixl::aarch64::x8, vixl::aarch64::x9, vixl::aarch64::x10, vixl::aarch64::x11, vixl::aarch64::x12, vixl::aarch64::x13, vixl::aarch64::x14, vixl::aarch64::x15, }; static const vixl::aarch64::VRegister kAvailableCalleeSaveFpuRegisters[] = { vixl::aarch64::d8, vixl::aarch64::d9, vixl::aarch64::d10, vixl::aarch64::d11, vixl::aarch64::d12, vixl::aarch64::d13, vixl::aarch64::d14, vixl::aarch64::d15, }; static const vixl::aarch64::VRegister kAvailableTempFpuRegisters[] = { vixl::aarch64::d0, vixl::aarch64::d1, vixl::aarch64::d2, vixl::aarch64::d3, vixl::aarch64::d4, vixl::aarch64::d5, vixl::aarch64::d6, vixl::aarch64::d7, }; class FastCompilerARM64 : public FastCompiler { public: FastCompilerARM64(ArtMethod* method, ArenaAllocator* allocator, ArenaStack* arena_stack, VariableSizedHandleScope* handles, const CompilerOptions& compiler_options, const DexCompilationUnit& dex_compilation_unit) : method_(method), allocator_(allocator), handles_(handles), assembler_(allocator, compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()), jit_patches_(&assembler_, allocator), compiler_options_(compiler_options), dex_compilation_unit_(dex_compilation_unit), code_generation_data_(CodeGenerationData::Create(arena_stack, InstructionSet::kArm64)), vreg_locations_(dex_compilation_unit.GetCodeItemAccessor().RegistersSize(), allocator->Adapter()), branch_targets_(dex_compilation_unit.GetCodeItemAccessor().InsnsSizeInCodeUnits(), allocator->Adapter()), object_register_masks_(dex_compilation_unit.GetCodeItemAccessor().InsnsSizeInCodeUnits(), allocator->Adapter()), is_non_null_masks_(dex_compilation_unit.GetCodeItemAccessor().InsnsSizeInCodeUnits(), allocator->Adapter()), has_frame_(false), core_spill_mask_(0u), fpu_spill_mask_(0u), object_register_mask_(0u), is_non_null_mask_(0u) { memset(is_non_null_masks_.data(), ~0, is_non_null_masks_.size() * sizeof(uint64_t)); memset(object_register_masks_.data(), ~0, object_register_masks_.size() * sizeof(uint64_t)); GetAssembler()->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo()); } // Top-level method to generate code for `method_`. bool Compile(); ArrayRef GetCode() const override { return ArrayRef(assembler_.CodeBufferBaseAddress(), assembler_.CodeSize()); } ScopedArenaVector BuildStackMaps() const override { return code_generation_data_->GetStackMapStream()->Encode(); } ArrayRef GetCfiData() const override { return ArrayRef(*assembler_.cfi().data()); } int32_t GetFrameSize() const override { if (!has_frame_) { return 0; } size_t size = FrameEntrySpillSize() + /* method */ static_cast(kArm64PointerSize) + /* out registers */ GetCodeItemAccessor().OutsSize() * kVRegSize; return RoundUp(size, kStackAlignment); } uint32_t GetNumberOfJitRoots() const override { return code_generation_data_->GetNumberOfJitRoots(); } void EmitJitRoots(uint8_t* code, const uint8_t* roots_data, /*out*/std::vector>* roots) override REQUIRES_SHARED(Locks::mutator_lock_) { code_generation_data_->EmitJitRoots(roots); jit_patches_.EmitJitRootPatches(code, roots_data, *code_generation_data_); } ~FastCompilerARM64() override { GetVIXLAssembler()->Reset(); } const char* GetUnimplementedReason() const { return unimplemented_reason_; } private: // Go over each instruction of the method, and generate code for them. bool ProcessInstructions(); // Initialize the locations of parameters for this method. bool InitializeParameters(); // Generate code for the frame entry. Only called when needed. If the frame // entry has already been generated, do nothing. bool EnsureHasFrame(); // Generate code for a frame exit. void PopFrameAndReturn(); // Record a stack map at the given dex_pc. void RecordPcInfo(uint32_t dex_pc); // Generate code to move from one location to another. bool MoveLocation(Location destination, Location source, DataType::Type dst_type); // Get a register location for the dex register `reg`. Saves the location into // `vreg_locations_` for next uses of `reg`. // `next` should be the next dex instruction, to help choose the register. Location CreateNewRegisterLocation(uint32_t reg, DataType::Type type, const Instruction* next); // Return the existing register location for `reg`. Location GetExistingRegisterLocation(uint32_t reg, DataType::Type type); // Move dex registers holding constants into physical registers. Used when // branching. void MoveConstantsToRegisters(); // Update the masks associated to the given dex_pc. Used when dex_pc is a // branch target. void UpdateMasks(uint32_t dex_pc); // Generate code for one instruction. bool ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc, const Instruction* next); // Setup the arguments for an invoke. bool SetupArguments(InvokeType invoke_type, const InstructionOperands& operands, const char* shorty, /* out */ uint32_t* obj_reg); // Generate code for doing a Java invoke. bool HandleInvoke(const Instruction& instruction, uint32_t dex_pc, InvokeType invoke_type); // Generate code for IF_* instructions. template bool If_21_22t(const Instruction& instruction, uint32_t dex_pc); // Generate code for doing a runtime invoke. void InvokeRuntime(QuickEntrypointEnum entrypoint, uint32_t dex_pc); bool BuildLoadString(uint32_t vreg, dex::StringIndex string_index, const Instruction* next); bool BuildNewInstance( uint32_t vreg, dex::TypeIndex string_index, uint32_t dex_pc, const Instruction* next); bool BuildCheckCast(uint32_t vreg, dex::TypeIndex type_index, uint32_t dex_pc); bool LoadMethod(Register reg, ArtMethod* method); void DoReadBarrierOn(Register reg, vixl::aarch64::Label* exit = nullptr, bool do_mr_check = true); bool CanGenerateCodeFor(ArtField* field, bool can_receiver_be_null) REQUIRES_SHARED(Locks::mutator_lock_); // Mark whether dex register `vreg_index` is an object. void UpdateRegisterMask(uint32_t vreg_index, bool is_object) { // Note that the register mask is only useful when there is a frame, so we // use the callee save registers for the mask. if (is_object) { object_register_mask_ |= (1 << kAvailableCalleeSaveRegisters[vreg_index].GetCode()); } else { object_register_mask_ &= ~(1 << kAvailableCalleeSaveRegisters[vreg_index].GetCode()); } } // Mark whether dex register `vreg_index` can be null. void UpdateNonNullMask(uint32_t vreg_index, bool can_be_null) { if (can_be_null) { is_non_null_mask_ &= ~(1 << vreg_index); } else { is_non_null_mask_ |= (1 << vreg_index); } } // Update information about dex register `vreg_index`. void UpdateLocal(uint32_t vreg_index, bool is_object, bool can_be_null = true) { UpdateRegisterMask(vreg_index, is_object); UpdateNonNullMask(vreg_index, can_be_null); } // Whether dex register `vreg_index` can be null. bool CanBeNull(uint32_t vreg_index) const { return (is_non_null_mask_ & (1 << vreg_index)) == 0; } // Get the label associated with the given `dex_pc`. vixl::aarch64::Label* GetLabelOf(uint32_t dex_pc) { return &branch_targets_[dex_pc]; } // If we need to abort compilation, clear branch targets, required by vixl. void AbortCompilation() { for (vixl::aarch64::Label& label : branch_targets_) { if (label.IsLinked()) { __ Bind(&label); } } } // Compiler utilities. // Arm64Assembler* GetAssembler() { return &assembler_; } vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); } const DexFile& GetDexFile() const { return *dex_compilation_unit_.GetDexFile(); } const CodeItemDataAccessor& GetCodeItemAccessor() const { return dex_compilation_unit_.GetCodeItemAccessor(); } bool HitUnimplemented() const { return unimplemented_reason_ != nullptr; } // Frame related utilities. // uint32_t GetCoreSpillSize() const { return GetFramePreservedCoreRegisters().GetTotalSizeInBytes(); } uint32_t FrameEntrySpillSize() const { return GetFramePreservedFPRegisters().GetTotalSizeInBytes() + GetCoreSpillSize(); } CPURegList GetFramePreservedCoreRegisters() const { return CPURegList(CPURegister::kRegister, kXRegSize, core_spill_mask_); } CPURegList GetFramePreservedFPRegisters() const { return CPURegList(CPURegister::kVRegister, kDRegSize, fpu_spill_mask_); } // Method being compiled. ArtMethod* method_; // Allocator for any allocation happening in the compiler. ArenaAllocator* allocator_; VariableSizedHandleScope* handles_; // Compilation utilities. Arm64Assembler assembler_; JitPatchesARM64 jit_patches_; const CompilerOptions& compiler_options_; const DexCompilationUnit& dex_compilation_unit_; std::unique_ptr code_generation_data_; // The current location of each dex register. ArenaVector vreg_locations_; // A vector of size code units for dex pcs that are branch targets. ArenaVector branch_targets_; // For dex pcs that are branch targets, the register mask that will be used at // the point of that pc. ArenaVector object_register_masks_; // For dex pcs that are branch targets, the mask for non-null objects that will // be used at the point of that pc. ArenaVector is_non_null_masks_; // Whether we've created a frame for this compiled method. bool has_frame_; // CPU registers that have been spilled in the frame. uint32_t core_spill_mask_; // FPU registers that have been spilled in the frame. uint32_t fpu_spill_mask_; // The current mask to know which physical register holds an object. uint64_t object_register_mask_; // The current mask to know if a dex register is known non-null. uint64_t is_non_null_mask_; // The return type of the compiled method. Saved to avoid re-computing it on // the return instruction. DataType::Type return_type_; // The return type of the last invoke instruction. DataType::Type previous_invoke_return_type_; // If non-empty, the reason the compilation could not be finished. const char* unimplemented_reason_ = nullptr; }; bool FastCompilerARM64::InitializeParameters() { if (GetCodeItemAccessor().TriesSize() != 0) { // TODO: Support try/catch. unimplemented_reason_ = "TryCatch"; return false; } const char* shorty = dex_compilation_unit_.GetShorty(); uint16_t number_of_vregs = GetCodeItemAccessor().RegistersSize(); uint16_t number_of_parameters = GetCodeItemAccessor().InsSize(); uint16_t vreg_parameter_index = number_of_vregs - number_of_parameters; if (number_of_vregs > arraysize(kAvailableTempRegisters) || number_of_vregs > arraysize(kAvailableCalleeSaveRegisters) || number_of_vregs > arraysize(kAvailableTempFpuRegisters) || number_of_vregs > arraysize(kAvailableCalleeSaveFpuRegisters)) { // Too many registers for this compiler. unimplemented_reason_ = "TooManyRegisters"; return false; } InvokeDexCallingConventionVisitorARM64 convention; if (!dex_compilation_unit_.IsStatic()) { // Add the implicit 'this' argument, not expressed in the signature. vreg_locations_[vreg_parameter_index] = convention.GetNextLocation(DataType::Type::kReference); UpdateLocal(vreg_parameter_index, /* is_object= */ true, /* can_be_null= */ false); ++vreg_parameter_index; --number_of_parameters; } for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++, shorty_pos++, vreg_parameter_index++) { DataType::Type type = DataType::FromShorty(shorty[shorty_pos]); vreg_locations_[vreg_parameter_index] = convention.GetNextLocation(type); UpdateLocal(vreg_parameter_index, /* is_object= */ (type == DataType::Type::kReference), /* can_be_null= */ true); if (DataType::Is64BitType(type)) { ++i; ++vreg_parameter_index; } } return_type_ = DataType::FromShorty(shorty[0]); return true; } void FastCompilerARM64::MoveConstantsToRegisters() { for (uint32_t i = 0; i < vreg_locations_.size(); ++i) { Location location = vreg_locations_[i]; if (location.IsConstant()) { vreg_locations_[i] = CreateNewRegisterLocation(i, DataType::Type::kInt32, /* next= */ nullptr); MoveLocation(vreg_locations_[i], location, DataType::Type::kInt32); DCHECK(!HitUnimplemented()); } } } void FastCompilerARM64::UpdateMasks(uint32_t dex_pc) { object_register_masks_[dex_pc] &= object_register_mask_; is_non_null_masks_[dex_pc] &= is_non_null_mask_; } bool FastCompilerARM64::ProcessInstructions() { DCHECK(GetCodeItemAccessor().HasCodeItem()); DexInstructionIterator it = GetCodeItemAccessor().begin(); DexInstructionIterator end = GetCodeItemAccessor().end(); DCHECK(it != end); do { DexInstructionPcPair pair = *it; ++it; // Fetch the next instruction as a micro-optimization currently only used // for optimizing returns. const Instruction* next = nullptr; if (it != end) { const DexInstructionPcPair& next_pair = *it; next = &next_pair.Inst(); if (GetLabelOf(next_pair.DexPc())->IsLinked()) { // Disable the micro-optimization, as the next instruction is a branch // target. next = nullptr; } } vixl::aarch64::Label* label = GetLabelOf(pair.DexPc()); if (label->IsLinked()) { // Emulate a branch to this pc. MoveConstantsToRegisters(); UpdateMasks(pair.DexPc()); // Set new masks based on all incoming edges. is_non_null_mask_ = is_non_null_masks_[pair.DexPc()]; object_register_mask_ = object_register_masks_[pair.DexPc()]; __ Bind(label); } if (!ProcessDexInstruction(pair.Inst(), pair.DexPc(), next)) { DCHECK(HitUnimplemented()); return false; } // Note: There may be no Thread for gtests. DCHECK(Thread::Current() == nullptr || !Thread::Current()->IsExceptionPending()) << GetDexFile().PrettyMethod(dex_compilation_unit_.GetDexMethodIndex()) << " " << pair.Inst().Name() << "@" << pair.DexPc(); DCHECK(!HitUnimplemented()) << GetUnimplementedReason(); } while (it != end); return true; } bool FastCompilerARM64::MoveLocation(Location destination, Location source, DataType::Type dst_type) { if (source.Equals(destination)) { return true; } if (source.IsRegister() && destination.IsRegister()) { CPURegister dst = CPURegisterFrom(destination, dst_type); __ Mov(Register(dst), RegisterFrom(source, dst_type)); return true; } if (source.IsConstant() && destination.IsRegister()) { if (source.GetConstant()->IsIntConstant()) { __ Mov(RegisterFrom(destination, DataType::Type::kInt32), source.GetConstant()->AsIntConstant()->GetValue()); return true; } } unimplemented_reason_ = "MoveLocation"; return false; } Location FastCompilerARM64::CreateNewRegisterLocation(uint32_t reg, DataType::Type type, const Instruction* next) { if (next != nullptr && (next->Opcode() == Instruction::RETURN_OBJECT || next->Opcode() == Instruction::RETURN) && (next->VRegA_11x() == reg)) { // If the next instruction is a return, use the return register from the calling // convention. InvokeDexCallingConventionVisitorARM64 convention; vreg_locations_[reg] = convention.GetReturnLocation(return_type_); return vreg_locations_[reg]; } else if (vreg_locations_[reg].IsStackSlot() || vreg_locations_[reg].IsDoubleStackSlot()) { unimplemented_reason_ = "MoveStackSlot"; // Return a phony location. return DataType::IsFloatingPointType(type) ? Location::FpuRegisterLocation(1) : Location::RegisterLocation(1); } else if (DataType::IsFloatingPointType(type)) { if (vreg_locations_[reg].IsFpuRegister()) { // Re-use existing register. return vreg_locations_[reg]; } else if (has_frame_) { // TODO: Regenerate the method with floating point support. unimplemented_reason_ = "FpuRegisterAllocation"; vreg_locations_[reg] = Location::FpuRegisterLocation(1); return vreg_locations_[reg]; } else { vreg_locations_[reg] = Location::FpuRegisterLocation(kAvailableTempFpuRegisters[reg].GetCode()); return vreg_locations_[reg]; } } else if (vreg_locations_[reg].IsRegister()) { // Re-use existing register. return vreg_locations_[reg]; } else { // Get the associated register with `reg`. uint32_t register_code = has_frame_ ? kAvailableCalleeSaveRegisters[reg].GetCode() : kAvailableTempRegisters[reg].GetCode(); vreg_locations_[reg] = Location::RegisterLocation(register_code); return vreg_locations_[reg]; } } Location FastCompilerARM64::GetExistingRegisterLocation(uint32_t reg, DataType::Type type) { if (vreg_locations_[reg].IsStackSlot() || vreg_locations_[reg].IsDoubleStackSlot()) { unimplemented_reason_ = "MoveStackSlot"; // Return a phony location. return DataType::IsFloatingPointType(type) ? Location::FpuRegisterLocation(1) : Location::RegisterLocation(1); } else if (DataType::IsFloatingPointType(type)) { if (vreg_locations_[reg].IsFpuRegister()) { return vreg_locations_[reg]; } else { // TODO: Regenerate the method with floating point support. unimplemented_reason_ = "FpuRegisterAllocation"; vreg_locations_[reg] = Location::FpuRegisterLocation(1); return vreg_locations_[reg]; } } else if (vreg_locations_[reg].IsRegister()) { return vreg_locations_[reg]; } else { unimplemented_reason_ = "UnknownLocation"; vreg_locations_[reg] = Location::RegisterLocation(1); return Location::RegisterLocation(1); } } void FastCompilerARM64::RecordPcInfo(uint32_t dex_pc) { DCHECK(has_frame_); uint32_t native_pc = GetAssembler()->CodePosition(); StackMapStream* stack_map_stream = code_generation_data_->GetStackMapStream(); CHECK_EQ(object_register_mask_ & callee_saved_core_registers.GetList(), object_register_mask_); stack_map_stream->BeginStackMapEntry(dex_pc, native_pc, object_register_mask_); stack_map_stream->EndStackMapEntry(); } void FastCompilerARM64::PopFrameAndReturn() { if (has_frame_) { CodeGeneratorARM64::PopFrameAndReturn(GetAssembler(), GetFrameSize(), GetFramePreservedCoreRegisters(), GetFramePreservedFPRegisters()); } else { DCHECK_EQ(GetFrameSize(), 0); __ Ret(); } } bool FastCompilerARM64::EnsureHasFrame() { if (has_frame_) { // Frame entry has already been generated. return true; } has_frame_ = true; uint16_t number_of_vregs = GetCodeItemAccessor().RegistersSize(); for (int i = 0; i < number_of_vregs; ++i) { // Assume any vreg will be held in a callee-save register. core_spill_mask_ |= (1 << kAvailableCalleeSaveRegisters[i].GetCode()); if (vreg_locations_[i].IsFpuRegister()) { // TODO: Re-generate method with floating points. unimplemented_reason_ = "FloatingPoint"; return false; } } core_spill_mask_ |= (1 << lr.GetCode()); code_generation_data_->GetStackMapStream()->BeginMethod(GetFrameSize(), core_spill_mask_, fpu_spill_mask_, GetCodeItemAccessor().RegistersSize(), /* is_compiling_baseline= */ true, /* is_debuggable= */ false); MacroAssembler* masm = GetVIXLAssembler(); { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireX(); __ Sub(temp, sp, static_cast(GetStackOverflowReservedBytes(InstructionSet::kArm64))); // Ensure that between load and RecordPcInfo there are no pools emitted. ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); __ ldr(wzr, MemOperand(temp, 0)); RecordPcInfo(0); } // Stack layout: // sp[frame_size - 8] : lr. // ... : other preserved core registers. // ... : other preserved fp registers. // ... : reserved frame space. // sp[0] : current method. int32_t frame_size = GetFrameSize(); uint32_t core_spills_offset = frame_size - GetCoreSpillSize(); CPURegList preserved_core_registers = GetFramePreservedCoreRegisters(); DCHECK(!preserved_core_registers.IsEmpty()); uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize(); CPURegList preserved_fp_registers = GetFramePreservedFPRegisters(); // Save the current method if we need it, or if using STP reduces code // size. Note that we do not do this in HCurrentMethod, as the // instruction might have been removed in the SSA graph. CPURegister lowest_spill; if (core_spills_offset == kXRegSizeInBytes) { // If there is no gap between the method and the lowest core spill, use // aligned STP pre-index to store both. Max difference is 512. We do // that to reduce code size even if we do not have to save the method. DCHECK_LE(frame_size, 512); // 32 core registers are only 256 bytes. lowest_spill = preserved_core_registers.PopLowestIndex(); __ Stp(kArtMethodRegister, lowest_spill, MemOperand(sp, -frame_size, PreIndex)); } else { __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); } GetAssembler()->cfi().AdjustCFAOffset(frame_size); if (lowest_spill.IsValid()) { GetAssembler()->cfi().RelOffset(DWARFReg(lowest_spill), core_spills_offset); core_spills_offset += kXRegSizeInBytes; } GetAssembler()->SpillRegisters(preserved_core_registers, core_spills_offset); GetAssembler()->SpillRegisters(preserved_fp_registers, fp_spills_offset); // Move registers which are currently allocated from caller-saves to callee-saves. for (int i = 0; i < number_of_vregs; ++i) { if (vreg_locations_[i].IsRegister()) { Location new_location = Location::RegisterLocation(kAvailableCalleeSaveRegisters[i].GetCode()); if (!MoveLocation(new_location, vreg_locations_[i], DataType::Type::kInt64)) { return false; } vreg_locations_[i] = new_location; } else if (vreg_locations_[i].IsFpuRegister()) { Location new_location = Location::FpuRegisterLocation(kAvailableCalleeSaveFpuRegisters[i].GetCode()); if (!MoveLocation(new_location, vreg_locations_[i], DataType::Type::kFloat64)) { return false; } vreg_locations_[i] = new_location; } } // Increment hotness. We use the ArtMethod's counter as we're not allocating a // `ProfilingInfo` object in the fast baseline compiler. if (!Runtime::Current()->IsAotCompiler()) { uint64_t address = reinterpret_cast64(method_); UseScratchRegisterScope temps(masm); Register counter = temps.AcquireW(); vixl::aarch64::Label increment, done; uint32_t entrypoint_offset = GetThreadOffset(kQuickCompileOptimized).Int32Value(); __ Ldrh(counter, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); __ Cbnz(counter, &increment); __ Ldr(lr, MemOperand(tr, entrypoint_offset)); // Note: we don't record the call here (and therefore don't generate a stack // map), as the entrypoint should never be suspended. __ Blr(lr); __ Bind(&increment); __ Add(counter, counter, -1); __ Strh(counter, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); __ Bind(&done); } // Do the suspend check. if (compiler_options_.GetImplicitSuspendChecks()) { ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); __ ldr(kImplicitSuspendCheckRegister, MemOperand(kImplicitSuspendCheckRegister)); RecordPcInfo(0); } else { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireW(); vixl::aarch64::Label continue_label; __ Ldr(temp, MemOperand(tr, Thread::ThreadFlagsOffset().SizeValue())); __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags()); __ B(eq, &continue_label); uint32_t entrypoint_offset = GetThreadOffset(kQuickTestSuspend).Int32Value(); __ Ldr(lr, MemOperand(tr, entrypoint_offset)); { ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); __ blr(lr); RecordPcInfo(0); } __ Bind(&continue_label); } return true; } bool FastCompilerARM64::SetupArguments(InvokeType invoke_type, const InstructionOperands& operands, const char* shorty, /* out */ uint32_t* obj_reg) { const size_t number_of_operands = operands.GetNumberOfOperands(); size_t start_index = 0u; size_t argument_index = 0u; InvokeDexCallingConventionVisitorARM64 convention; // Handle 'this' parameter. if (invoke_type != kStatic) { if (number_of_operands == 0u) { unimplemented_reason_ = "BogusSignature"; return false; } start_index = 1u; *obj_reg = operands.GetOperand(0u); if (!MoveLocation(convention.GetNextLocation(DataType::Type::kReference), vreg_locations_[*obj_reg], DataType::Type::kReference)) { return false; } } uint32_t shorty_index = 1; // Skip the return type. // Handle all parameters except 'this'. for (size_t i = start_index; i < number_of_operands; ++i, ++argument_index, ++shorty_index) { // Make sure we don't go over the expected arguments or over the number of // dex registers given. If the instruction was seen as dead by the verifier, // it hasn't been properly checked. char c = shorty[shorty_index]; if (UNLIKELY(c == 0)) { unimplemented_reason_ = "BogusSignature"; return false; } DataType::Type type = DataType::FromShorty(c); bool is_wide = (type == DataType::Type::kInt64) || (type == DataType::Type::kFloat64); if (is_wide && ((i + 1 == number_of_operands) || (operands.GetOperand(i) + 1 != operands.GetOperand(i + 1)))) { unimplemented_reason_ = "BogusSignature"; return false; } if (!MoveLocation(convention.GetNextLocation(type), vreg_locations_[operands.GetOperand(i)], type)) { return false; } if (is_wide) { ++i; } } return true; } bool FastCompilerARM64::LoadMethod(Register reg, ArtMethod* method) { if (Runtime::Current()->IsAotCompiler()) { unimplemented_reason_ = "AOTLoadMethod"; return false; } __ Ldr(reg, jit_patches_.DeduplicateUint64Literal(reinterpret_cast(method))); return true; } bool FastCompilerARM64::HandleInvoke(const Instruction& instruction, uint32_t dex_pc, InvokeType invoke_type) { Instruction::Code opcode = instruction.Opcode(); uint16_t method_index = (opcode >= Instruction::INVOKE_VIRTUAL_RANGE) ? instruction.VRegB_3rc() : instruction.VRegB_35c(); ArtMethod* resolved_method = nullptr; size_t offset = 0u; { Thread* self = Thread::Current(); ScopedObjectAccess soa(self); ClassLinker* const class_linker = dex_compilation_unit_.GetClassLinker(); resolved_method = method_->SkipAccessChecks() ? class_linker->ResolveMethodId(method_index, method_) : class_linker->ResolveMethodWithChecks( method_index, method_, invoke_type); if (resolved_method == nullptr) { DCHECK(self->IsExceptionPending()); self->ClearException(); unimplemented_reason_ = "UnresolvedInvoke"; return false; } if (resolved_method->IsConstructor() && resolved_method->GetDeclaringClass()->IsObjectClass()) { // Object. is always empty. Return early to not generate a frame. if (kIsDebugBuild) { CHECK(resolved_method->GetDeclaringClass()->IsVerified()); CodeItemDataAccessor accessor(*resolved_method->GetDexFile(), resolved_method->GetCodeItem()); CHECK_EQ(accessor.InsnsSizeInCodeUnits(), 1u); CHECK_EQ(accessor.begin().Inst().Opcode(), Instruction::RETURN_VOID); } // No need to update `previous_invoke_return_type_`, we know it is not going the // be used. return true; } if (invoke_type == kSuper) { resolved_method = method_->SkipAccessChecks() ? FindSuperMethodToCall(method_index, resolved_method, method_, self) : FindSuperMethodToCall(method_index, resolved_method, method_, self); if (resolved_method == nullptr) { DCHECK(self->IsExceptionPending()) << method_->PrettyMethod(); self->ClearException(); unimplemented_reason_ = "UnresolvedInvokeSuper"; return false; } } else if (invoke_type == kVirtual) { offset = resolved_method->GetVtableIndex(); } else if (invoke_type == kInterface) { offset = resolved_method->GetImtIndex(); } if (resolved_method->IsStringConstructor()) { unimplemented_reason_ = "StringConstructor"; return false; } } // Given we are calling a method, generate a frame. if (!EnsureHasFrame()) { return false; } // Setup the arguments for the call. uint32_t obj_reg = -1; const char* shorty = dex_compilation_unit_.GetDexFile()->GetMethodShorty(method_index); if (opcode >= Instruction::INVOKE_VIRTUAL_RANGE) { RangeInstructionOperands operands(instruction.VRegC(), instruction.VRegA_3rc()); if (!SetupArguments(invoke_type, operands, shorty, &obj_reg)) { return false; } } else { uint32_t args[5]; uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args); VarArgsInstructionOperands operands(args, number_of_vreg_arguments); if (!SetupArguments(invoke_type, operands, shorty, &obj_reg)) { return false; } } // Save the invoke return type for the next move-result instruction. previous_invoke_return_type_ = DataType::FromShorty(shorty[0]); if (invoke_type != kStatic) { bool can_be_null = CanBeNull(obj_reg); // Load the class of the instance. For kDirect and kSuper, this acts as a // null check. if (can_be_null || invoke_type == kVirtual || invoke_type == kInterface) { InvokeDexCallingConvention calling_convention; Register receiver = calling_convention.GetRegisterAt(0); Offset class_offset = mirror::Object::ClassOffset(); EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); __ Ldr(kArtMethodRegister.W(), HeapOperand(receiver.W(), class_offset)); if (can_be_null) { RecordPcInfo(dex_pc); } } } if (invoke_type == kVirtual) { size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(offset, kArm64PointerSize).SizeValue(); __ Ldr(kArtMethodRegister, MemOperand(kArtMethodRegister, method_offset)); } else if (invoke_type == kInterface) { __ Ldr(kArtMethodRegister, MemOperand(kArtMethodRegister, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); uint32_t method_offset = static_cast(ImTable::OffsetOfElement(offset, kArm64PointerSize)); __ Ldr(kArtMethodRegister, MemOperand(kArtMethodRegister, method_offset)); if (!LoadMethod(ip1, resolved_method)) { return false; } } else { DCHECK(invoke_type == kDirect || invoke_type == kSuper || invoke_type == kStatic); if (!LoadMethod(kArtMethodRegister, resolved_method)) { return false; } } Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); __ Ldr(lr, MemOperand(kArtMethodRegister, entry_point.SizeValue())); { // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); __ blr(lr); RecordPcInfo(dex_pc); } return true; } void FastCompilerARM64::InvokeRuntime(QuickEntrypointEnum entrypoint, uint32_t dex_pc) { ThreadOffset64 entrypoint_offset = GetThreadOffset(entrypoint); __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value())); // Ensure the pc position is recorded immediately after the `blr` instruction. ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); __ blr(lr); if (EntrypointRequiresStackMap(entrypoint)) { RecordPcInfo(dex_pc); } } bool FastCompilerARM64::BuildLoadString(uint32_t vreg, dex::StringIndex string_index, const Instruction* next) { // Generate a frame because of the read barrier. if (!EnsureHasFrame()) { return false; } Location loc = CreateNewRegisterLocation(vreg, DataType::Type::kReference, next); if (HitUnimplemented()) { return false; } if (Runtime::Current()->IsAotCompiler()) { unimplemented_reason_ = "AOTLoadString"; return false; } ScopedObjectAccess soa(Thread::Current()); ClassLinker* const class_linker = dex_compilation_unit_.GetClassLinker(); ObjPtr str = class_linker->ResolveString(string_index, method_); if (str == nullptr) { soa.Self()->ClearException(); unimplemented_reason_ = "NullString"; return false; } Handle h_str = handles_->NewHandle(str); Register dst = RegisterFrom(loc, DataType::Type::kReference); __ Ldr(dst.W(), jit_patches_.DeduplicateJitStringLiteral(GetDexFile(), string_index, h_str, code_generation_data_.get())); __ Ldr(dst.W(), MemOperand(dst.X())); DoReadBarrierOn(dst); UpdateLocal(vreg, /* is_object= */ true, /* can_be_null= */ false); return true; } bool FastCompilerARM64::BuildNewInstance(uint32_t vreg, dex::TypeIndex type_index, uint32_t dex_pc, const Instruction* next) { if (!EnsureHasFrame()) { return false; } if (Runtime::Current()->IsAotCompiler()) { unimplemented_reason_ = "AOTNewInstance"; return false; } ScopedObjectAccess soa(Thread::Current()); ObjPtr klass = dex_compilation_unit_.GetClassLinker()->ResolveType( type_index, dex_compilation_unit_.GetDexCache(), dex_compilation_unit_.GetClassLoader()); if (klass == nullptr || !method_->GetDeclaringClass()->CanAccess(klass) || klass->IsStringClass()) { soa.Self()->ClearException(); unimplemented_reason_ = "UnsupportedClassForNewInstance"; return false; } InvokeRuntimeCallingConvention calling_convention; Register cls_reg = calling_convention.GetRegisterAt(0); Handle h_klass = handles_->NewHandle(klass); __ Ldr(cls_reg.W(), jit_patches_.DeduplicateJitClassLiteral(GetDexFile(), type_index, h_klass , code_generation_data_.get())); __ Ldr(cls_reg.W(), MemOperand(cls_reg.X())); DoReadBarrierOn(cls_reg); QuickEntrypointEnum entrypoint = kQuickAllocObjectInitialized; if (h_klass->IsFinalizable() || !h_klass->IsVisiblyInitialized() || h_klass->IsClassClass() || // Classes cannot be allocated in code !klass->IsInstantiable()) { entrypoint = kQuickAllocObjectWithChecks; } InvokeRuntime(entrypoint, dex_pc); if (!MoveLocation(CreateNewRegisterLocation(vreg, DataType::Type::kReference, next), calling_convention.GetReturnLocation(DataType::Type::kReference), DataType::Type::kReference)) { return false; } if (HitUnimplemented()) { return false; } UpdateLocal(vreg, /* is_object= */ true, /* can_be_null= */ false); return true; } bool FastCompilerARM64::BuildCheckCast(uint32_t vreg, dex::TypeIndex type_index, uint32_t dex_pc) { if (!EnsureHasFrame()) { return false; } InvokeRuntimeCallingConvention calling_convention; UseScratchRegisterScope temps(GetVIXLAssembler()); Register cls = calling_convention.GetRegisterAt(1); Register obj_cls = calling_convention.GetRegisterAt(2); Register obj = WRegisterFrom(GetExistingRegisterLocation(vreg, DataType::Type::kReference)); if (HitUnimplemented()) { return false; } ScopedObjectAccess soa(Thread::Current()); ObjPtr klass = dex_compilation_unit_.GetClassLinker()->ResolveType( type_index, dex_compilation_unit_.GetDexCache(), dex_compilation_unit_.GetClassLoader()); if (klass == nullptr || !method_->GetDeclaringClass()->CanAccess(klass)) { soa.Self()->ClearException(); unimplemented_reason_ = "UnsupportedCheckCast"; return false; } Handle h_klass = handles_->NewHandle(klass); vixl::aarch64::Label exit, read_barrier_exit; __ Cbz(obj, &exit); __ Ldr(cls.W(), jit_patches_.DeduplicateJitClassLiteral(GetDexFile(), type_index, h_klass , code_generation_data_.get())); __ Ldr(cls.W(), MemOperand(cls.X())); __ Ldr(obj_cls.W(), MemOperand(obj.X())); __ Cmp(cls.W(), obj_cls.W()); __ B(eq, &exit); // Read barrier on the GC Root. DoReadBarrierOn(cls, &read_barrier_exit); // Read barrier on the object's class. DoReadBarrierOn(obj_cls, &read_barrier_exit, /* do_mr_check= */ false); __ Bind(&read_barrier_exit); __ Cmp(cls.W(), obj_cls.W()); __ B(eq, &exit); if (!MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), LocationFrom(obj), DataType::Type::kReference)) { return false; } InvokeRuntime(kQuickCheckInstanceOf, dex_pc); __ Bind(&exit); return true; } void FastCompilerARM64::DoReadBarrierOn(Register reg, vixl::aarch64::Label* exit, bool do_mr_check) { DCHECK(has_frame_); vixl::aarch64::Label local_exit; if (do_mr_check) { __ Cbz(mr, (exit != nullptr) ? exit : &local_exit); } int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset(reg.GetCode()); __ Ldr(lr, MemOperand(tr, entry_point_offset)); __ Blr(lr); if (exit == nullptr && do_mr_check) { __ Bind(&local_exit); } } bool FastCompilerARM64::CanGenerateCodeFor(ArtField* field, bool can_receiver_be_null) { if (field == nullptr) { // Clear potential resolution exception. Thread::Current()->ClearException(); unimplemented_reason_ = "UnresolvedField"; return false; } if (field->IsVolatile()) { unimplemented_reason_ = "VolatileField"; return false; } if (can_receiver_be_null) { if (!CanDoImplicitNullCheckOn(field->GetOffset().Uint32Value())) { unimplemented_reason_ = "TooLargeFieldOffset"; return false; } } return true; } #define DO_CASE(arm_op, op, other) \ case arm_op: { \ if (constant op other) { \ __ B(label); \ } \ return true; \ } \ template bool FastCompilerARM64::If_21_22t(const Instruction& instruction, uint32_t dex_pc) { DCHECK_EQ(kCompareWithZero ? Instruction::Format::k21t : Instruction::Format::k22t, Instruction::FormatOf(instruction.Opcode())); if (!EnsureHasFrame()) { return false; } int32_t target_offset = kCompareWithZero ? instruction.VRegB_21t() : instruction.VRegC_22t(); DCHECK_EQ(target_offset, instruction.GetTargetOffset()); if (target_offset < 0) { // TODO: Support for negative branches requires two passes. unimplemented_reason_ = "NegativeBranch"; return false; } int32_t register_index = kCompareWithZero ? instruction.VRegA_21t() : instruction.VRegA_22t(); vixl::aarch64::Label* label = GetLabelOf(dex_pc + target_offset); Location location = vreg_locations_[register_index]; if (kCompareWithZero) { // We are going to branch, move all constants to registers to make the merge // point use the same locations. MoveConstantsToRegisters(); UpdateMasks(dex_pc + target_offset); if (location.IsConstant()) { DCHECK(location.GetConstant()->IsIntConstant()); int32_t constant = location.GetConstant()->AsIntConstant()->GetValue(); switch (kCond) { DO_CASE(vixl::aarch64::eq, ==, 0); DO_CASE(vixl::aarch64::ne, !=, 0); DO_CASE(vixl::aarch64::lt, <, 0); DO_CASE(vixl::aarch64::le, <=, 0); DO_CASE(vixl::aarch64::gt, >, 0); DO_CASE(vixl::aarch64::ge, >=, 0); } return true; } else if (location.IsRegister()) { CPURegister reg = CPURegisterFrom(location, DataType::Type::kInt32); switch (kCond) { case vixl::aarch64::eq: { __ Cbz(Register(reg), label); return true; } case vixl::aarch64::ne: { __ Cbnz(Register(reg), label); return true; } default: { __ Cmp(Register(reg), 0); __ B(kCond, label); return true; } } } else { DCHECK(location.IsStackSlot()); unimplemented_reason_ = "CompareWithZeroOnStackSlot"; } return false; } // !kCompareWithZero Location other_location = vreg_locations_[instruction.VRegB_22t()]; // We are going to branch, move all constants to registers to make the merge // point use the same locations. MoveConstantsToRegisters(); UpdateMasks(dex_pc + target_offset); if (location.IsConstant() && other_location.IsConstant()) { int32_t constant = location.GetConstant()->AsIntConstant()->GetValue(); int32_t other_constant = other_location.GetConstant()->AsIntConstant()->GetValue(); switch (kCond) { DO_CASE(vixl::aarch64::eq, ==, other_constant); DO_CASE(vixl::aarch64::ne, !=, other_constant); DO_CASE(vixl::aarch64::lt, <, other_constant); DO_CASE(vixl::aarch64::le, <=, other_constant); DO_CASE(vixl::aarch64::gt, >, other_constant); DO_CASE(vixl::aarch64::ge, >=, other_constant); } return true; } // Reload the locations, which can now be registers. location = vreg_locations_[register_index]; other_location = vreg_locations_[instruction.VRegB_22t()]; if (location.IsRegister() && other_location.IsRegister()) { CPURegister reg = CPURegisterFrom(location, DataType::Type::kInt32); CPURegister other_reg = CPURegisterFrom(other_location, DataType::Type::kInt32); __ Cmp(Register(reg), Register(other_reg)); __ B(kCond, label); return true; } unimplemented_reason_ = "UnimplementedCompare"; return false; } #undef DO_CASE bool FastCompilerARM64::ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc, const Instruction* next) { bool is_object = false; switch (instruction.Opcode()) { case Instruction::CONST_4: { int32_t register_index = instruction.VRegA_11n(); int32_t constant = instruction.VRegB_11n(); vreg_locations_[register_index] = Location::ConstantLocation(new (allocator_) HIntConstant(constant)); UpdateLocal(register_index, /* is_object= */ false); return true; } case Instruction::CONST_16: { int32_t register_index = instruction.VRegA_21s(); int32_t constant = instruction.VRegB_21s(); vreg_locations_[register_index] = Location::ConstantLocation(new (allocator_) HIntConstant(constant)); UpdateLocal(register_index, /* is_object= */ false); return true; } case Instruction::CONST: { break; } case Instruction::CONST_HIGH16: { break; } case Instruction::CONST_WIDE_16: { break; } case Instruction::CONST_WIDE_32: { break; } case Instruction::CONST_WIDE: { break; } case Instruction::CONST_WIDE_HIGH16: { break; } case Instruction::MOVE: case Instruction::MOVE_FROM16: case Instruction::MOVE_16: { break; } case Instruction::MOVE_WIDE: case Instruction::MOVE_WIDE_FROM16: case Instruction::MOVE_WIDE_16: { break; } case Instruction::MOVE_OBJECT: case Instruction::MOVE_OBJECT_16: case Instruction::MOVE_OBJECT_FROM16: { break; } case Instruction::RETURN_VOID: { if (method_->IsConstructor() && !method_->IsStatic() && dex_compilation_unit_.RequiresConstructorBarrier()) { __ Dmb(InnerShareable, BarrierWrites); } PopFrameAndReturn(); return true; } #define IF_XX(comparison, cond) \ case Instruction::IF_##cond: \ return If_21_22t(instruction, dex_pc); \ case Instruction::IF_##cond##Z: \ return If_21_22t(instruction, dex_pc); IF_XX(vixl::aarch64::eq, EQ); IF_XX(vixl::aarch64::ne, NE); IF_XX(vixl::aarch64::lt, LT); IF_XX(vixl::aarch64::le, LE); IF_XX(vixl::aarch64::gt, GT); IF_XX(vixl::aarch64::ge, GE); case Instruction::GOTO: case Instruction::GOTO_16: case Instruction::GOTO_32: { break; } case Instruction::RETURN: case Instruction::RETURN_OBJECT: { int32_t register_index = instruction.VRegA_11x(); InvokeDexCallingConventionVisitorARM64 convention; if (!MoveLocation(convention.GetReturnLocation(return_type_), vreg_locations_[register_index], return_type_)) { return false; } if (has_frame_) { // We may have used the "record last instruction before return in return // register" optimization (see `CreateNewRegisterLocation`), // so set the returned register back to a callee save location in case the // method has a frame and there are instructions after this return that // may use this register. uint32_t register_code = kAvailableCalleeSaveRegisters[register_index].GetCode(); vreg_locations_[register_index] = Location::RegisterLocation(register_code); } PopFrameAndReturn(); return true; } case Instruction::RETURN_WIDE: { break; } case Instruction::INVOKE_DIRECT: case Instruction::INVOKE_DIRECT_RANGE: return HandleInvoke(instruction, dex_pc, kDirect); case Instruction::INVOKE_INTERFACE: case Instruction::INVOKE_INTERFACE_RANGE: return HandleInvoke(instruction, dex_pc, kInterface); case Instruction::INVOKE_STATIC: case Instruction::INVOKE_STATIC_RANGE: return HandleInvoke(instruction, dex_pc, kStatic); case Instruction::INVOKE_SUPER: case Instruction::INVOKE_SUPER_RANGE: return HandleInvoke(instruction, dex_pc, kSuper); case Instruction::INVOKE_VIRTUAL: case Instruction::INVOKE_VIRTUAL_RANGE: { return HandleInvoke(instruction, dex_pc, kVirtual); } case Instruction::INVOKE_POLYMORPHIC: { break; } case Instruction::INVOKE_POLYMORPHIC_RANGE: { break; } case Instruction::INVOKE_CUSTOM: { break; } case Instruction::INVOKE_CUSTOM_RANGE: { break; } case Instruction::NEG_INT: { break; } case Instruction::NEG_LONG: { break; } case Instruction::NEG_FLOAT: { break; } case Instruction::NEG_DOUBLE: { break; } case Instruction::NOT_INT: { break; } case Instruction::NOT_LONG: { break; } case Instruction::INT_TO_LONG: { break; } case Instruction::INT_TO_FLOAT: { break; } case Instruction::INT_TO_DOUBLE: { break; } case Instruction::LONG_TO_INT: { break; } case Instruction::LONG_TO_FLOAT: { break; } case Instruction::LONG_TO_DOUBLE: { break; } case Instruction::FLOAT_TO_INT: { break; } case Instruction::FLOAT_TO_LONG: { break; } case Instruction::FLOAT_TO_DOUBLE: { break; } case Instruction::DOUBLE_TO_INT: { break; } case Instruction::DOUBLE_TO_LONG: { break; } case Instruction::DOUBLE_TO_FLOAT: { break; } case Instruction::INT_TO_BYTE: { break; } case Instruction::INT_TO_SHORT: { break; } case Instruction::INT_TO_CHAR: { break; } case Instruction::ADD_INT: { break; } case Instruction::ADD_LONG: { break; } case Instruction::ADD_DOUBLE: { break; } case Instruction::ADD_FLOAT: { break; } case Instruction::SUB_INT: { break; } case Instruction::SUB_LONG: { break; } case Instruction::SUB_FLOAT: { break; } case Instruction::SUB_DOUBLE: { break; } case Instruction::ADD_INT_2ADDR: { break; } case Instruction::MUL_INT: { break; } case Instruction::MUL_LONG: { break; } case Instruction::MUL_FLOAT: { break; } case Instruction::MUL_DOUBLE: { break; } case Instruction::DIV_INT: { break; } case Instruction::DIV_LONG: { break; } case Instruction::DIV_FLOAT: { break; } case Instruction::DIV_DOUBLE: { break; } case Instruction::REM_INT: { break; } case Instruction::REM_LONG: { break; } case Instruction::REM_FLOAT: { break; } case Instruction::REM_DOUBLE: { break; } case Instruction::AND_INT: { break; } case Instruction::AND_LONG: { break; } case Instruction::SHL_INT: { break; } case Instruction::SHL_LONG: { break; } case Instruction::SHR_INT: { break; } case Instruction::SHR_LONG: { break; } case Instruction::USHR_INT: { break; } case Instruction::USHR_LONG: { break; } case Instruction::OR_INT: { break; } case Instruction::OR_LONG: { break; } case Instruction::XOR_INT: { break; } case Instruction::XOR_LONG: { break; } case Instruction::ADD_LONG_2ADDR: { break; } case Instruction::ADD_DOUBLE_2ADDR: { break; } case Instruction::ADD_FLOAT_2ADDR: { break; } case Instruction::SUB_INT_2ADDR: { break; } case Instruction::SUB_LONG_2ADDR: { break; } case Instruction::SUB_FLOAT_2ADDR: { break; } case Instruction::SUB_DOUBLE_2ADDR: { break; } case Instruction::MUL_INT_2ADDR: { break; } case Instruction::MUL_LONG_2ADDR: { break; } case Instruction::MUL_FLOAT_2ADDR: { break; } case Instruction::MUL_DOUBLE_2ADDR: { break; } case Instruction::DIV_INT_2ADDR: { break; } case Instruction::DIV_LONG_2ADDR: { break; } case Instruction::REM_INT_2ADDR: { break; } case Instruction::REM_LONG_2ADDR: { break; } case Instruction::REM_FLOAT_2ADDR: { break; } case Instruction::REM_DOUBLE_2ADDR: { break; } case Instruction::SHL_INT_2ADDR: { break; } case Instruction::SHL_LONG_2ADDR: { break; } case Instruction::SHR_INT_2ADDR: { break; } case Instruction::SHR_LONG_2ADDR: { break; } case Instruction::USHR_INT_2ADDR: { break; } case Instruction::USHR_LONG_2ADDR: { break; } case Instruction::DIV_FLOAT_2ADDR: { break; } case Instruction::DIV_DOUBLE_2ADDR: { break; } case Instruction::AND_INT_2ADDR: { break; } case Instruction::AND_LONG_2ADDR: { break; } case Instruction::OR_INT_2ADDR: { break; } case Instruction::OR_LONG_2ADDR: { break; } case Instruction::XOR_INT_2ADDR: { break; } case Instruction::XOR_LONG_2ADDR: { break; } case Instruction::ADD_INT_LIT16: { break; } case Instruction::AND_INT_LIT16: { break; } case Instruction::OR_INT_LIT16: { break; } case Instruction::XOR_INT_LIT16: { break; } case Instruction::RSUB_INT: { break; } case Instruction::MUL_INT_LIT16: { break; } case Instruction::ADD_INT_LIT8: { break; } case Instruction::AND_INT_LIT8: { break; } case Instruction::OR_INT_LIT8: { break; } case Instruction::XOR_INT_LIT8: { break; } case Instruction::RSUB_INT_LIT8: { break; } case Instruction::MUL_INT_LIT8: { break; } case Instruction::DIV_INT_LIT16: case Instruction::DIV_INT_LIT8: { break; } case Instruction::REM_INT_LIT16: case Instruction::REM_INT_LIT8: { break; } case Instruction::SHL_INT_LIT8: { break; } case Instruction::SHR_INT_LIT8: { break; } case Instruction::USHR_INT_LIT8: { break; } case Instruction::NEW_INSTANCE: { dex::TypeIndex type_index(instruction.VRegB_21c()); return BuildNewInstance(instruction.VRegA_21c(), type_index, dex_pc, next); } case Instruction::NEW_ARRAY: { break; } case Instruction::FILLED_NEW_ARRAY: { break; } case Instruction::FILLED_NEW_ARRAY_RANGE: { break; } case Instruction::FILL_ARRAY_DATA: { break; } case Instruction::MOVE_RESULT_OBJECT: is_object = true; FALLTHROUGH_INTENDED; case Instruction::MOVE_RESULT: { int32_t register_index = instruction.VRegA_11x(); InvokeDexCallingConventionVisitorARM64 convention; if (!MoveLocation( CreateNewRegisterLocation(register_index, previous_invoke_return_type_, next), convention.GetReturnLocation(previous_invoke_return_type_), previous_invoke_return_type_)) { return false; } if (HitUnimplemented()) { return false; } UpdateLocal(register_index, is_object); return true; } case Instruction::MOVE_RESULT_WIDE: { break; } case Instruction::CMP_LONG: { break; } case Instruction::CMPG_FLOAT: { break; } case Instruction::CMPG_DOUBLE: { break; } case Instruction::CMPL_FLOAT: { break; } case Instruction::CMPL_DOUBLE: { break; } case Instruction::NOP: return true; case Instruction::IGET_OBJECT: case Instruction::IGET: case Instruction::IGET_WIDE: case Instruction::IGET_BOOLEAN: case Instruction::IGET_BYTE: case Instruction::IGET_CHAR: case Instruction::IGET_SHORT: { uint32_t source_or_dest_reg = instruction.VRegA_22c(); uint32_t obj_reg = instruction.VRegB_22c(); uint16_t field_index = instruction.VRegC_22c(); bool can_receiver_be_null = CanBeNull(obj_reg); ArtField* field = nullptr; { ScopedObjectAccess soa(Thread::Current()); field = ResolveFieldWithAccessChecks(soa.Self(), dex_compilation_unit_.GetClassLinker(), field_index, method_, /* is_static= */ false, /* is_put= */ false, /* resolve_field_type= */ 0u); if (!CanGenerateCodeFor(field, can_receiver_be_null)) { return false; } } if (can_receiver_be_null) { // We need a frame in case the null check throws. if (!EnsureHasFrame()) { return false; } } MemOperand mem = HeapOperand( RegisterFrom(GetExistingRegisterLocation(obj_reg, DataType::Type::kReference), DataType::Type::kReference), field->GetOffset()); if (HitUnimplemented()) { return false; } if (instruction.Opcode() == Instruction::IGET_OBJECT) { // Generate a frame because of the read barrier. if (!EnsureHasFrame()) { return false; } Register dst = WRegisterFrom( CreateNewRegisterLocation(source_or_dest_reg, DataType::Type::kReference, next)); if (HitUnimplemented()) { return false; } { // Ensure the pc position is recorded immediately after the load instruction. EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); __ Ldr(dst, mem); if (can_receiver_be_null) { RecordPcInfo(dex_pc); } } UpdateLocal(source_or_dest_reg, /* is_object= */ true); DoReadBarrierOn(dst); return true; } // Ensure the pc position is recorded immediately after the load instruction. EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); switch (instruction.Opcode()) { case Instruction::IGET_BOOLEAN: { Register dst = WRegisterFrom( CreateNewRegisterLocation(source_or_dest_reg, DataType::Type::kInt32, next)); __ Ldrb(Register(dst), mem); break; } case Instruction::IGET_BYTE: { Register dst = WRegisterFrom( CreateNewRegisterLocation(source_or_dest_reg, DataType::Type::kInt32, next)); __ Ldrsb(Register(dst), mem); break; } case Instruction::IGET_CHAR: { Register dst = WRegisterFrom( CreateNewRegisterLocation(source_or_dest_reg, DataType::Type::kInt32, next)); __ Ldrh(Register(dst), mem); break; } case Instruction::IGET_SHORT: { Register dst = WRegisterFrom( CreateNewRegisterLocation(source_or_dest_reg, DataType::Type::kInt32, next)); __ Ldrsh(Register(dst), mem); break; } case Instruction::IGET: { const dex::FieldId& field_id = GetDexFile().GetFieldId(field_index); const char* type = GetDexFile().GetFieldTypeDescriptor(field_id); DataType::Type field_type = DataType::FromShorty(type[0]); if (DataType::IsFloatingPointType(field_type)) { VRegister dst = SRegisterFrom( CreateNewRegisterLocation(source_or_dest_reg, field_type, next)); __ Ldr(dst, mem); } else { Register dst = WRegisterFrom( CreateNewRegisterLocation(source_or_dest_reg, DataType::Type::kInt32, next)); __ Ldr(dst, mem); } if (HitUnimplemented()) { return false; } break; } default: unimplemented_reason_ = "UnimplementedIGet"; return false; } UpdateLocal(source_or_dest_reg, /* is_object= */ false); if (can_receiver_be_null) { RecordPcInfo(dex_pc); } return true; } case Instruction::IPUT_OBJECT: is_object = true; FALLTHROUGH_INTENDED; case Instruction::IPUT: case Instruction::IPUT_WIDE: case Instruction::IPUT_BOOLEAN: case Instruction::IPUT_BYTE: case Instruction::IPUT_CHAR: case Instruction::IPUT_SHORT: { uint32_t source_reg = instruction.VRegA_22c(); uint32_t obj_reg = instruction.VRegB_22c(); uint16_t field_index = instruction.VRegC_22c(); bool can_receiver_be_null = CanBeNull(obj_reg); ArtField* field = nullptr; { ScopedObjectAccess soa(Thread::Current()); field = ResolveFieldWithAccessChecks(soa.Self(), dex_compilation_unit_.GetClassLinker(), field_index, method_, /* is_static= */ false, /* is_put= */ true, /* resolve_field_type= */ is_object); if (!CanGenerateCodeFor(field, can_receiver_be_null)) { return false; } } if (can_receiver_be_null) { // We need a frame in case the null check throws. if (!EnsureHasFrame()) { return false; } } Register holder = RegisterFrom( GetExistingRegisterLocation(obj_reg, DataType::Type::kReference), DataType::Type::kReference); if (HitUnimplemented()) { return false; } MemOperand mem = HeapOperand(holder, field->GetOffset()); // Need one temp if the stored value is a constant. UseScratchRegisterScope temps(GetVIXLAssembler()); Location src = vreg_locations_[source_reg]; bool assigning_constant = false; if (src.IsConstant()) { assigning_constant = true; if (src.GetConstant()->IsIntConstant() && src.GetConstant()->AsIntConstant()->GetValue() == 0) { src = Location::RegisterLocation(XZR); } else { src = Location::RegisterLocation(temps.AcquireW().GetCode()); if (!MoveLocation(src, vreg_locations_[source_reg], DataType::Type::kInt32)) { return false; } } } else if (src.IsStackSlot() || src.IsDoubleStackSlot()) { unimplemented_reason_ = "IPUTOnStackSlot"; return false; } if (instruction.Opcode() == Instruction::IPUT_OBJECT) { Register reg = WRegisterFrom(src); { // Ensure the pc position is recorded immediately after the store instruction. EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); __ Str(reg, mem); if (can_receiver_be_null) { RecordPcInfo(dex_pc); } } // If we assign a constant (only null for iput-object), no need for the write // barrier. if (!assigning_constant) { vixl::aarch64::Label exit; __ Cbz(reg, &exit); Register card = temps.AcquireX(); Register temp = temps.AcquireW(); __ Ldr(card, MemOperand(tr, Thread::CardTableOffset().Int32Value())); __ Lsr(temp, holder, gc::accounting::CardTable::kCardShift); __ Strb(card, MemOperand(card, temp.X())); __ Bind(&exit); } return true; } // Ensure the pc position is recorded immediately after the store instruction. EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); switch (instruction.Opcode()) { case Instruction::IPUT_BOOLEAN: case Instruction::IPUT_BYTE: { __ Strb(WRegisterFrom(src), mem); break; } case Instruction::IPUT_CHAR: case Instruction::IPUT_SHORT: { __ Strh(WRegisterFrom(src), mem); break; } case Instruction::IPUT: { if (src.IsFpuRegister()) { __ Str(SRegisterFrom(src), mem); } else { __ Str(WRegisterFrom(src), mem); } break; } default: unimplemented_reason_ = "UnimplementedIPut"; return false; } if (can_receiver_be_null) { RecordPcInfo(dex_pc); } return true; } case Instruction::SGET: case Instruction::SGET_WIDE: case Instruction::SGET_OBJECT: case Instruction::SGET_BOOLEAN: case Instruction::SGET_BYTE: case Instruction::SGET_CHAR: case Instruction::SGET_SHORT: { break; } case Instruction::SPUT: case Instruction::SPUT_WIDE: case Instruction::SPUT_OBJECT: case Instruction::SPUT_BOOLEAN: case Instruction::SPUT_BYTE: case Instruction::SPUT_CHAR: case Instruction::SPUT_SHORT: { break; } #define ARRAY_XX(kind, anticipated_type) \ case Instruction::AGET##kind: { \ break; \ } \ case Instruction::APUT##kind: { \ break; \ } ARRAY_XX(, DataType::Type::kInt32); ARRAY_XX(_WIDE, DataType::Type::kInt64); ARRAY_XX(_OBJECT, DataType::Type::kReference); ARRAY_XX(_BOOLEAN, DataType::Type::kBool); ARRAY_XX(_BYTE, DataType::Type::kInt8); ARRAY_XX(_CHAR, DataType::Type::kUint16); ARRAY_XX(_SHORT, DataType::Type::kInt16); case Instruction::ARRAY_LENGTH: { break; } case Instruction::CONST_STRING: { dex::StringIndex string_index(instruction.VRegB_21c()); return BuildLoadString(instruction.VRegA_21c(), string_index, next); } case Instruction::CONST_STRING_JUMBO: { dex::StringIndex string_index(instruction.VRegB_31c()); return BuildLoadString(instruction.VRegA_31c(), string_index, next); } case Instruction::CONST_CLASS: { break; } case Instruction::CONST_METHOD_HANDLE: { break; } case Instruction::CONST_METHOD_TYPE: { break; } case Instruction::MOVE_EXCEPTION: { break; } case Instruction::THROW: { if (!EnsureHasFrame()) { return false; } int32_t reg = instruction.VRegA_11x(); InvokeRuntimeCallingConvention calling_convention; if (!MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), vreg_locations_[reg], DataType::Type::kReference)) { return false; } InvokeRuntime(kQuickDeliverException, dex_pc); return true; } case Instruction::INSTANCE_OF: { break; } case Instruction::CHECK_CAST: { uint8_t reference = instruction.VRegA_21c(); dex::TypeIndex type_index(instruction.VRegB_21c()); return BuildCheckCast(reference, type_index, dex_pc); } case Instruction::MONITOR_ENTER: { break; } case Instruction::MONITOR_EXIT: { break; } case Instruction::SPARSE_SWITCH: case Instruction::PACKED_SWITCH: { break; } case Instruction::UNUSED_3E ... Instruction::UNUSED_43: case Instruction::UNUSED_73: case Instruction::UNUSED_79: case Instruction::UNUSED_7A: case Instruction::UNUSED_E3 ... Instruction::UNUSED_F9: { break; } } unimplemented_reason_ = instruction.Name(); return false; } // NOLINT(readability/fn_size) bool FastCompilerARM64::Compile() { if (!InitializeParameters()) { DCHECK(HitUnimplemented()); AbortCompilation(); return false; } if (!ProcessInstructions()) { DCHECK(HitUnimplemented()); AbortCompilation(); return false; } DCHECK(!HitUnimplemented()) << GetUnimplementedReason(); if (!has_frame_) { code_generation_data_->GetStackMapStream()->BeginMethod(/* frame_size= */ 0u, /* core_spill_mask= */ 0u, /* fp_spill_mask= */ 0u, GetCodeItemAccessor().RegistersSize(), /* is_compiling_baseline= */ true, /* is_debuggable= */ false); } code_generation_data_->GetStackMapStream()->EndMethod(assembler_.CodeSize()); assembler_.FinalizeCode(); if (VLOG_IS_ON(jit)) { // Dump the generated code { ScopedObjectAccess soa(Thread::Current()); VLOG(jit) << "Dumping generated fast baseline code for " << method_->PrettyMethod(); } FILE* file = tmpfile(); MacroAssembler* masm = GetVIXLAssembler(); PrintDisassembler print_disasm(file); vixl::aarch64::Instruction* dis_start = masm->GetBuffer()->GetStartAddress(); vixl::aarch64::Instruction* dis_end = masm->GetBuffer()->GetEndAddress(); print_disasm.DisassembleBuffer(dis_start, dis_end); fseek(file, 0L, SEEK_SET); char buffer[1024]; const char* line; while ((line = fgets(buffer, sizeof(buffer), file)) != nullptr) { VLOG(jit) << std::string(line); } fclose(file); } return true; } } // namespace arm64 std::unique_ptr FastCompiler::CompileARM64( ArtMethod* method, ArenaAllocator* allocator, ArenaStack* arena_stack, VariableSizedHandleScope* handles, const CompilerOptions& compiler_options, const DexCompilationUnit& dex_compilation_unit) { if (!compiler_options.GetImplicitNullChecks() || !compiler_options.GetImplicitStackOverflowChecks() || kUseTableLookupReadBarrier || !kReserveMarkingRegister || kPoisonHeapReferences) { // Configurations we don't support. return nullptr; } std::unique_ptr compiler(new arm64::FastCompilerARM64( method, allocator, arena_stack, handles, compiler_options, dex_compilation_unit)); if (compiler->Compile()) { return compiler; } VLOG(jit) << "Did not fast compile because of " << compiler->GetUnimplementedReason(); return nullptr; } } // namespace art