diff options
Diffstat (limited to 'compiler/optimizing')
78 files changed, 4874 insertions, 30376 deletions
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc index a5f78cafe0..e1f061ae70 100644 --- a/compiler/optimizing/block_builder.cc +++ b/compiler/optimizing/block_builder.cc @@ -398,6 +398,48 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { } } +void HBasicBlockBuilder::InsertSynthesizedLoopsForOsr() { + ArenaSet<uint32_t> targets(allocator_->Adapter(kArenaAllocGraphBuilder)); + // Collect basic blocks that are targets of a negative branch. + for (const DexInstructionPcPair& pair : code_item_accessor_) { + const uint32_t dex_pc = pair.DexPc(); + const Instruction& instruction = pair.Inst(); + if (instruction.IsBranch()) { + uint32_t target_dex_pc = dex_pc + instruction.GetTargetOffset(); + if (target_dex_pc < dex_pc) { + HBasicBlock* block = GetBlockAt(target_dex_pc); + CHECK_NE(kNoDexPc, block->GetDexPc()); + targets.insert(block->GetBlockId()); + } + } else if (instruction.IsSwitch()) { + DexSwitchTable table(instruction, dex_pc); + for (DexSwitchTableIterator s_it(table); !s_it.Done(); s_it.Advance()) { + uint32_t target_dex_pc = dex_pc + s_it.CurrentTargetOffset(); + if (target_dex_pc < dex_pc) { + HBasicBlock* block = GetBlockAt(target_dex_pc); + CHECK_NE(kNoDexPc, block->GetDexPc()); + targets.insert(block->GetBlockId()); + } + } + } + } + + // Insert synthesized loops before the collected blocks. + for (uint32_t block_id : targets) { + HBasicBlock* block = graph_->GetBlocks()[block_id]; + HBasicBlock* loop_block = new (allocator_) HBasicBlock(graph_, block->GetDexPc()); + graph_->AddBlock(loop_block); + while (!block->GetPredecessors().empty()) { + block->GetPredecessors()[0]->ReplaceSuccessor(block, loop_block); + } + loop_block->AddSuccessor(loop_block); + loop_block->AddSuccessor(block); + // We loop on false - we know this won't be optimized later on as the loop + // is marked irreducible, which disables loop optimizations. + loop_block->AddInstruction(new (allocator_) HIf(graph_->GetIntConstant(0), kNoDexPc)); + } +} + bool HBasicBlockBuilder::Build() { DCHECK(code_item_accessor_.HasCodeItem()); DCHECK(graph_->GetBlocks().empty()); @@ -413,6 +455,10 @@ bool HBasicBlockBuilder::Build() { ConnectBasicBlocks(); InsertTryBoundaryBlocks(); + if (graph_->IsCompilingOsr()) { + InsertSynthesizedLoopsForOsr(); + } + return true; } diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h index 2c1f034d80..42a3f327e7 100644 --- a/compiler/optimizing/block_builder.h +++ b/compiler/optimizing/block_builder.h @@ -59,6 +59,11 @@ class HBasicBlockBuilder : public ValueObject { void ConnectBasicBlocks(); void InsertTryBoundaryBlocks(); + // To ensure branches with negative offsets can always OSR jump to compiled + // code, we insert synthesized loops before each block that is the target of a + // negative branch. + void InsertSynthesizedLoopsForOsr(); + // Helper method which decides whether `catch_block` may have live normal // predecessors and thus whether a synthetic catch block needs to be created // to avoid mixing normal and exceptional predecessors. diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 2bbb570c8d..cfd9ea6333 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -32,14 +32,6 @@ #include "code_generator_x86_64.h" #endif -#ifdef ART_ENABLE_CODEGEN_mips -#include "code_generator_mips.h" -#endif - -#ifdef ART_ENABLE_CODEGEN_mips64 -#include "code_generator_mips64.h" -#endif - #include "base/bit_utils.h" #include "base/bit_utils_iterator.h" #include "base/casts.h" @@ -64,6 +56,7 @@ #include "ssa_liveness_analysis.h" #include "stack_map.h" #include "stack_map_stream.h" +#include "string_builder_append.h" #include "thread-current-inl.h" #include "utils/assembler.h" @@ -394,7 +387,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { GetStackMapStream()->BeginMethod(HasEmptyFrame() ? 0 : frame_size_, core_spill_mask_, fpu_spill_mask_, - GetGraph()->GetNumberOfVRegs()); + GetGraph()->GetNumberOfVRegs(), + GetGraph()->IsCompilingBaseline()); size_t frame_start = GetAssembler()->CodeSize(); GenerateFrameEntry(); @@ -599,6 +593,57 @@ void CodeGenerator::GenerateInvokeCustomCall(HInvokeCustom* invoke) { InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr); } +void CodeGenerator::CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction, + Location out) { + ArenaAllocator* allocator = GetGraph()->GetAllocator(); + LocationSummary* locations = + new (allocator) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + locations->SetOut(out); + instruction->GetLocations()->SetInAt(instruction->FormatIndex(), + Location::ConstantLocation(instruction->GetFormat())); + + uint32_t format = static_cast<uint32_t>(instruction->GetFormat()->GetValue()); + uint32_t f = format; + PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet()); + size_t stack_offset = static_cast<size_t>(pointer_size); // Start after the ArtMethod*. + for (size_t i = 0, num_args = instruction->GetNumberOfArguments(); i != num_args; ++i) { + StringBuilderAppend::Argument arg_type = + static_cast<StringBuilderAppend::Argument>(f & StringBuilderAppend::kArgMask); + switch (arg_type) { + case StringBuilderAppend::Argument::kStringBuilder: + case StringBuilderAppend::Argument::kString: + case StringBuilderAppend::Argument::kCharArray: + static_assert(sizeof(StackReference<mirror::Object>) == sizeof(uint32_t), "Size check."); + FALLTHROUGH_INTENDED; + case StringBuilderAppend::Argument::kBoolean: + case StringBuilderAppend::Argument::kChar: + case StringBuilderAppend::Argument::kInt: + case StringBuilderAppend::Argument::kFloat: + locations->SetInAt(i, Location::StackSlot(stack_offset)); + break; + case StringBuilderAppend::Argument::kLong: + case StringBuilderAppend::Argument::kDouble: + stack_offset = RoundUp(stack_offset, sizeof(uint64_t)); + locations->SetInAt(i, Location::DoubleStackSlot(stack_offset)); + // Skip the low word, let the common code skip the high word. + stack_offset += sizeof(uint32_t); + break; + default: + LOG(FATAL) << "Unexpected arg format: 0x" << std::hex + << (f & StringBuilderAppend::kArgMask) << " full format: 0x" << format; + UNREACHABLE(); + } + f >>= StringBuilderAppend::kBitsPerArg; + stack_offset += sizeof(uint32_t); + } + DCHECK_EQ(f, 0u); + + size_t param_size = stack_offset - static_cast<size_t>(pointer_size); + DCHECK_ALIGNED(param_size, kVRegSize); + size_t num_vregs = param_size / kVRegSize; + graph_->UpdateMaximumNumberOfOutVRegs(num_vregs); +} + void CodeGenerator::CreateUnresolvedFieldLocationSummary( HInstruction* field_access, DataType::Type field_type, @@ -897,18 +942,6 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph, new (allocator) arm64::CodeGeneratorARM64(graph, compiler_options, stats)); } #endif -#ifdef ART_ENABLE_CODEGEN_mips - case InstructionSet::kMips: { - return std::unique_ptr<CodeGenerator>( - new (allocator) mips::CodeGeneratorMIPS(graph, compiler_options, stats)); - } -#endif -#ifdef ART_ENABLE_CODEGEN_mips64 - case InstructionSet::kMips64: { - return std::unique_ptr<CodeGenerator>( - new (allocator) mips64::CodeGeneratorMIPS64(graph, compiler_options, stats)); - } -#endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { return std::unique_ptr<CodeGenerator>( @@ -958,6 +991,20 @@ CodeGenerator::CodeGenerator(HGraph* graph, is_leaf_(true), requires_current_method_(false), code_generation_data_() { + if (GetGraph()->IsCompilingOsr()) { + // Make OSR methods have all registers spilled, this simplifies the logic of + // jumping to the compiled code directly. + for (size_t i = 0; i < number_of_core_registers_; ++i) { + if (IsCoreCalleeSaveRegister(i)) { + AddAllocatedRegister(Location::RegisterLocation(i)); + } + } + for (size_t i = 0; i < number_of_fpu_registers_; ++i) { + if (IsFloatingPointCalleeSaveRegister(i)) { + AddAllocatedRegister(Location::FpuRegisterLocation(i)); + } + } + } } CodeGenerator::~CodeGenerator() {} @@ -1036,10 +1083,42 @@ ScopedArenaVector<uint8_t> CodeGenerator::BuildStackMaps(const dex::CodeItem* co return stack_map; } +// Returns whether stackmap dex register info is needed for the instruction. +// +// The following cases mandate having a dex register map: +// * Deoptimization +// when we need to obtain the values to restore actual vregisters for interpreter. +// * Debuggability +// when we want to observe the values / asynchronously deoptimize. +// * Monitor operations +// to allow dumping in a stack trace locked dex registers for non-debuggable code. +// * On-stack-replacement (OSR) +// when entering compiled for OSR code from the interpreter we need to initialize the compiled +// code values with the values from the vregisters. +// * Method local catch blocks +// a catch block must see the environment of the instruction from the same method that can +// throw to this block. +static bool NeedsVregInfo(HInstruction* instruction, bool osr) { + HGraph* graph = instruction->GetBlock()->GetGraph(); + return instruction->IsDeoptimize() || + graph->IsDebuggable() || + graph->HasMonitorOperations() || + osr || + instruction->CanThrowIntoCatchBlock(); +} + void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path, bool native_debug_info) { + RecordPcInfo(instruction, dex_pc, GetAssembler()->CodePosition(), slow_path, native_debug_info); +} + +void CodeGenerator::RecordPcInfo(HInstruction* instruction, + uint32_t dex_pc, + uint32_t native_pc, + SlowPathCode* slow_path, + bool native_debug_info) { if (instruction != nullptr) { // The code generated for some type conversions // may call the runtime, thus normally requiring a subsequent @@ -1063,9 +1142,6 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } } - // Collect PC infos for the mapping table. - uint32_t native_pc = GetAssembler()->CodePosition(); - StackMapStream* stack_map_stream = GetStackMapStream(); if (instruction == nullptr) { // For stack overflow checks and native-debug-info entries without dex register @@ -1114,12 +1190,15 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, StackMap::Kind kind = native_debug_info ? StackMap::Kind::Debug : (osr ? StackMap::Kind::OSR : StackMap::Kind::Default); + bool needs_vreg_info = NeedsVregInfo(instruction, osr); stack_map_stream->BeginStackMapEntry(outer_dex_pc, native_pc, register_mask, locations->GetStackMask(), - kind); - EmitEnvironment(environment, slow_path); + kind, + needs_vreg_info); + + EmitEnvironment(environment, slow_path, needs_vreg_info); stack_map_stream->EndStackMapEntry(); if (osr) { @@ -1232,19 +1311,8 @@ void CodeGenerator::AddSlowPath(SlowPathCode* slow_path) { code_generation_data_->AddSlowPath(slow_path); } -void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path) { - if (environment == nullptr) return; - +void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path) { StackMapStream* stack_map_stream = GetStackMapStream(); - if (environment->GetParent() != nullptr) { - // We emit the parent environment first. - EmitEnvironment(environment->GetParent(), slow_path); - stack_map_stream->BeginInlineInfoEntry(environment->GetMethod(), - environment->GetDexPc(), - environment->Size(), - &graph_->GetDexFile()); - } - // Walk over the environment, and record the location of dex registers. for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) { HInstruction* current = environment->GetInstructionAt(i); @@ -1389,8 +1457,31 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo LOG(FATAL) << "Unexpected kind " << location.GetKind(); } } +} + +void CodeGenerator::EmitEnvironment(HEnvironment* environment, + SlowPathCode* slow_path, + bool needs_vreg_info) { + if (environment == nullptr) return; + + StackMapStream* stack_map_stream = GetStackMapStream(); + bool emit_inline_info = environment->GetParent() != nullptr; + + if (emit_inline_info) { + // We emit the parent environment first. + EmitEnvironment(environment->GetParent(), slow_path, needs_vreg_info); + stack_map_stream->BeginInlineInfoEntry(environment->GetMethod(), + environment->GetDexPc(), + needs_vreg_info ? environment->Size() : 0, + &graph_->GetDexFile()); + } + + if (needs_vreg_info) { + // If a dex register map is not required we just won't emit it. + EmitVRegInfo(environment, slow_path); + } - if (environment->GetParent() != nullptr) { + if (emit_inline_info) { stack_map_stream->EndInlineInfoEntry(); } } @@ -1402,7 +1493,7 @@ bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) { void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) { HNullCheck* null_check = instr->GetImplicitNullCheck(); if (null_check != nullptr) { - RecordPcInfo(null_check, null_check->GetDexPc()); + RecordPcInfo(null_check, null_check->GetDexPc(), GetAssembler()->CodePosition()); } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index f70ecb612d..9e3e454f3d 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -222,7 +222,19 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual Assembler* GetAssembler() = 0; virtual const Assembler& GetAssembler() const = 0; virtual size_t GetWordSize() const = 0; - virtual size_t GetFloatingPointSpillSlotSize() const = 0; + + // Get FP register width in bytes for spilling/restoring in the slow paths. + // + // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers + // alias and live SIMD registers are forced to be spilled in full size in the slow paths. + virtual size_t GetSlowPathFPWidth() const { + // Default implementation. + return GetCalleePreservedFPWidth(); + } + + // Get FP register width required to be preserved by the target ABI. + virtual size_t GetCalleePreservedFPWidth() const = 0; + virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0; void InitializeCodeGeneration(size_t number_of_spill_slots, size_t maximum_safepoint_spill_size, @@ -319,20 +331,36 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize; } - // Record native to dex mapping for a suspend point. Required by runtime. + // Record native to dex mapping for a suspend point. Required by runtime. void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, + uint32_t native_pc, SlowPathCode* slow_path = nullptr, bool native_debug_info = false); + + // Record native to dex mapping for a suspend point. + // The native_pc is used from Assembler::CodePosition. + // + // Note: As Assembler::CodePosition is target dependent, it does not guarantee the exact native_pc + // for the instruction. If the exact native_pc is required it must be provided explicitly. + void RecordPcInfo(HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path = nullptr, + bool native_debug_info = false); + // Check whether we have already recorded mapping at this PC. bool HasStackMapAtCurrentPc(); + // Record extra stack maps if we support native debugging. + // + // ARM specific behaviour: The recorded native PC might be a branch over pools to instructions + // corresponding the dex PC. void MaybeRecordNativeDebugInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); bool CanMoveNullCheckToUser(HNullCheck* null_check); - void MaybeRecordImplicitNullCheck(HInstruction* instruction); + virtual void MaybeRecordImplicitNullCheck(HInstruction* instruction); LocationSummary* CreateThrowingSlowPathLocations( HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty()); void GenerateNullCheck(HNullCheck* null_check); @@ -546,6 +574,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { void GenerateInvokeCustomCall(HInvokeCustom* invoke); + void CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction, Location out); + void CreateUnresolvedFieldLocationSummary( HInstruction* field_access, DataType::Type field_type, @@ -673,7 +703,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { } uint32_t GetFpuSpillSize() const { - return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); + return POPCOUNT(fpu_spill_mask_) * GetCalleePreservedFPWidth(); } uint32_t GetCoreSpillSize() const { @@ -759,7 +789,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { size_t GetStackOffsetOfSavedRegister(size_t index); void GenerateSlowPaths(); void BlockIfInRegister(Location location, bool is_out = false) const; - void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path); + void EmitEnvironment(HEnvironment* environment, + SlowPathCode* slow_path, + bool needs_vreg_info = true); + void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path); OptimizingCompilerStats* stats_; @@ -788,6 +821,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { std::unique_ptr<CodeGenerationData> code_generation_data_; friend class OptimizingCFITest; + ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD); + ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD); DISALLOW_COPY_AND_ASSIGN(CodeGenerator); }; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 3086882678..7d1b0ea6dd 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -18,7 +18,7 @@ #include "arch/arm64/asm_support_arm64.h" #include "arch/arm64/instruction_set_features_arm64.h" -#include "art_method.h" +#include "art_method-inl.h" #include "base/bit_utils.h" #include "base/bit_utils_iterator.h" #include "class_table.h" @@ -224,12 +224,13 @@ void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummar stack_offset += kXRegSizeInBytes; } + const size_t fp_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSizeInBytes : kDRegSizeInBytes; const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); for (uint32_t i : LowToHighBits(fp_spills)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); saved_fpu_stack_offsets_[i] = stack_offset; - stack_offset += kDRegSizeInBytes; + stack_offset += fp_reg_size; } SaveRestoreLiveRegistersHelper(codegen, @@ -887,18 +888,19 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, move_resolver_(graph->GetAllocator(), this), assembler_(graph->GetAllocator(), compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()), - uint32_literals_(std::less<uint32_t>(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - uint64_literals_(std::less<uint64_t>(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + uint32_literals_(std::less<uint32_t>(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + uint64_literals_(std::less<uint64_t>(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -1059,17 +1061,68 @@ void ParallelMoveResolverARM64::EmitMove(size_t index) { codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid); } -void CodeGeneratorARM64::GenerateFrameEntry() { +void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) { MacroAssembler* masm = GetVIXLAssembler(); - __ Bind(&frame_entry_label_); - if (GetCompilerOptions().CountHotnessInCompiledCode()) { UseScratchRegisterScope temps(masm); - Register temp = temps.AcquireX(); - __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); - __ Add(temp, temp, 1); - __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + Register counter = temps.AcquireX(); + Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX(); + if (!is_frame_entry) { + __ Ldr(method, MemOperand(sp, 0)); + } + __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(counter, counter, 1); + // Subtract one if the counter would overflow. + __ Sub(counter, counter, Operand(counter, LSR, 16)); + __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value())); + } + + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + if (info != nullptr) { + uint64_t address = reinterpret_cast64<uint64_t>(info); + vixl::aarch64::Label done; + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireX(); + Register counter = temps.AcquireW(); + __ Mov(temp, address); + __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + __ Add(counter, counter, 1); + __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + __ Tst(counter, 0xffff); + __ B(ne, &done); + if (is_frame_entry) { + if (HasEmptyFrame()) { + // The entyrpoint expects the method at the bottom of the stack. We + // claim stack space necessary for alignment. + __ Claim(kStackAlignment); + __ Stp(kArtMethodRegister, lr, MemOperand(sp, 0)); + } else if (!RequiresCurrentMethod()) { + __ Str(kArtMethodRegister, MemOperand(sp, 0)); + } + } else { + CHECK(RequiresCurrentMethod()); + } + uint32_t entrypoint_offset = + GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value(); + __ Ldr(lr, MemOperand(tr, entrypoint_offset)); + // Note: we don't record the call here (and therefore don't generate a stack + // map), as the entrypoint should never be suspended. + __ Blr(lr); + if (HasEmptyFrame()) { + CHECK(is_frame_entry); + __ Ldr(lr, MemOperand(sp, 8)); + __ Drop(kStackAlignment); + } + __ Bind(&done); + } } +} + +void CodeGeneratorARM64::GenerateFrameEntry() { + MacroAssembler* masm = GetVIXLAssembler(); + __ Bind(&frame_entry_label_); bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod(); @@ -1089,27 +1142,42 @@ void CodeGeneratorARM64::GenerateFrameEntry() { } if (!HasEmptyFrame()) { - int frame_size = GetFrameSize(); // Stack layout: // sp[frame_size - 8] : lr. // ... : other preserved core registers. // ... : other preserved fp registers. // ... : reserved frame space. // sp[0] : current method. - - // Save the current method if we need it. Note that we do not - // do this in HCurrentMethod, as the instruction might have been removed - // in the SSA graph. - if (RequiresCurrentMethod()) { + int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize()); + uint32_t core_spills_offset = frame_size - GetCoreSpillSize(); + CPURegList preserved_core_registers = GetFramePreservedCoreRegisters(); + DCHECK(!preserved_core_registers.IsEmpty()); + uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize(); + CPURegList preserved_fp_registers = GetFramePreservedFPRegisters(); + + // Save the current method if we need it, or if using STP reduces code + // size. Note that we do not do this in HCurrentMethod, as the + // instruction might have been removed in the SSA graph. + CPURegister lowest_spill; + if (core_spills_offset == kXRegSizeInBytes) { + // If there is no gap between the method and the lowest core spill, use + // aligned STP pre-index to store both. Max difference is 512. We do + // that to reduce code size even if we do not have to save the method. + DCHECK_LE(frame_size, 512); // 32 core registers are only 256 bytes. + lowest_spill = preserved_core_registers.PopLowestIndex(); + __ Stp(kArtMethodRegister, lowest_spill, MemOperand(sp, -frame_size, PreIndex)); + } else if (RequiresCurrentMethod()) { __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); } else { __ Claim(frame_size); } GetAssembler()->cfi().AdjustCFAOffset(frame_size); - GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(), - frame_size - GetCoreSpillSize()); - GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(), - frame_size - FrameEntrySpillSize()); + if (lowest_spill.IsValid()) { + GetAssembler()->cfi().RelOffset(DWARFReg(lowest_spill), core_spills_offset); + core_spills_offset += kXRegSizeInBytes; + } + GetAssembler()->SpillRegisters(preserved_core_registers, core_spills_offset); + GetAssembler()->SpillRegisters(preserved_fp_registers, fp_spills_offset); if (GetGraph()->HasShouldDeoptimizeFlag()) { // Initialize should_deoptimize flag to 0. @@ -1117,19 +1185,37 @@ void CodeGeneratorARM64::GenerateFrameEntry() { __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag())); } } - + MaybeIncrementHotness(/* is_frame_entry= */ true); MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void CodeGeneratorARM64::GenerateFrameExit() { GetAssembler()->cfi().RememberState(); if (!HasEmptyFrame()) { - int frame_size = GetFrameSize(); - GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(), - frame_size - FrameEntrySpillSize()); - GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(), - frame_size - GetCoreSpillSize()); - __ Drop(frame_size); + int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize()); + uint32_t core_spills_offset = frame_size - GetCoreSpillSize(); + CPURegList preserved_core_registers = GetFramePreservedCoreRegisters(); + DCHECK(!preserved_core_registers.IsEmpty()); + uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize(); + CPURegList preserved_fp_registers = GetFramePreservedFPRegisters(); + + CPURegister lowest_spill; + if (core_spills_offset == kXRegSizeInBytes) { + // If there is no gap between the method and the lowest core spill, use + // aligned LDP pre-index to pop both. Max difference is 504. We do + // that to reduce code size even though the loaded method is unused. + DCHECK_LE(frame_size, 504); // 32 core registers are only 256 bytes. + lowest_spill = preserved_core_registers.PopLowestIndex(); + core_spills_offset += kXRegSizeInBytes; + } + GetAssembler()->UnspillRegisters(preserved_fp_registers, fp_spills_offset); + GetAssembler()->UnspillRegisters(preserved_core_registers, core_spills_offset); + if (lowest_spill.IsValid()) { + __ Ldp(xzr, lowest_spill, MemOperand(sp, frame_size, PostIndex)); + GetAssembler()->cfi().Restore(DWARFReg(lowest_spill)); + } else { + __ Drop(frame_size); + } GetAssembler()->cfi().AdjustCFAOffset(-frame_size); } __ Ret(); @@ -1146,7 +1232,7 @@ CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const { CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const { DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_, GetNumberOfFloatingPointRegisters())); - return CPURegList(CPURegister::kFPRegister, kDRegSize, + return CPURegList(CPURegister::kVRegister, kDRegSize, fpu_spill_mask_); } @@ -1245,16 +1331,18 @@ size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_ return kArm64WordSize; } -size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - FPRegister reg = FPRegister(reg_id, kDRegSize); - __ Str(reg, MemOperand(sp, stack_index)); - return kArm64WordSize; +size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, + uint32_t reg_id ATTRIBUTE_UNUSED) { + LOG(FATAL) << "FP registers shouldn't be saved/restored individually, " + << "use SaveRestoreLiveRegistersHelper"; + UNREACHABLE(); } -size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - FPRegister reg = FPRegister(reg_id, kDRegSize); - __ Ldr(reg, MemOperand(sp, stack_index)); - return kArm64WordSize; +size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, + uint32_t reg_id ATTRIBUTE_UNUSED) { + LOG(FATAL) << "FP registers shouldn't be saved/restored individually, " + << "use SaveRestoreLiveRegistersHelper"; + UNREACHABLE(); } void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const { @@ -1277,10 +1365,10 @@ void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* consta } else if (constant->IsNullConstant()) { __ Mov(Register(destination), 0); } else if (constant->IsFloatConstant()) { - __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue()); + __ Fmov(VRegister(destination), constant->AsFloatConstant()->GetValue()); } else { DCHECK(constant->IsDoubleConstant()); - __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue()); + __ Fmov(VRegister(destination), constant->AsDoubleConstant()->GetValue()); } } @@ -1304,7 +1392,7 @@ static bool CoherentConstantAndType(Location constant, DataType::Type type) { static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm, vixl::aarch64::UseScratchRegisterScope* temps, int size_in_bits) { - return masm->GetScratchFPRegisterList()->IsEmpty() + return masm->GetScratchVRegisterList()->IsEmpty() ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits)) : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits)); } @@ -1372,7 +1460,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination, if (GetGraph()->HasSIMD()) { __ Mov(QRegisterFrom(destination), QRegisterFrom(source)); } else { - __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type)); + __ Fmov(VRegister(dst), FPRegisterFrom(source, dst_type)); } } } @@ -1382,14 +1470,14 @@ void CodeGeneratorARM64::MoveLocation(Location destination, } else { DCHECK(source.IsSIMDStackSlot()); UseScratchRegisterScope temps(GetVIXLAssembler()); - if (GetVIXLAssembler()->GetScratchFPRegisterList()->IsEmpty()) { + if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) { Register temp = temps.AcquireX(); __ Ldr(temp, MemOperand(sp, source.GetStackIndex())); __ Str(temp, MemOperand(sp, destination.GetStackIndex())); __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize)); __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize)); } else { - FPRegister temp = temps.AcquireVRegisterOfSize(kQRegSize); + VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize); __ Ldr(temp, StackOperandFrom(source)); __ Str(temp, StackOperandFrom(destination)); } @@ -1563,7 +1651,7 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, MaybeRecordImplicitNullCheck(instruction); } } - __ Fmov(FPRegister(dst), temp); + __ Fmov(VRegister(dst), temp); break; } case DataType::Type::kUint32: @@ -1663,7 +1751,7 @@ void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, } else { DCHECK(src.IsFPRegister()); temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); - __ Fmov(temp_src, FPRegister(src)); + __ Fmov(temp_src, VRegister(src)); } { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); @@ -1687,14 +1775,25 @@ void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint, SlowPathCode* slow_path) { ValidateInvokeRuntime(entrypoint, instruction, slow_path); - __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value())); - { + ThreadOffset64 entrypoint_offset = GetThreadOffset<kArm64PointerSize>(entrypoint); + // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the + // entire oat file. This adds an extra branch and we do not want to slow down the main path. + // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative. + if (slow_path == nullptr || Runtime::Current()->UseJitCompilation()) { + __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value())); // Ensure the pc position is recorded immediately after the `blr` instruction. ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); __ blr(lr); if (EntrypointRequiresStackMap(entrypoint)) { RecordPcInfo(instruction, dex_pc, slow_path); } + } else { + // Ensure the pc position is recorded immediately after the `bl` instruction. + ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); + EmitEntrypointThunkCall(entrypoint_offset); + if (EntrypointRequiresStackMap(entrypoint)) { + RecordPcInfo(instruction, dex_pc, slow_path); + } } } @@ -1713,14 +1812,16 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); const size_t status_byte_offset = mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_initialized_value = - enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); - - // Even if the initialized flag is set, we need to ensure consistent memory ordering. - // TODO(vixl): Let the MacroAssembler handle MemOperand. - __ Add(temp, class_reg, status_byte_offset); - __ Ldarb(temp, HeapOperand(temp)); - __ Cmp(temp, shifted_initialized_value); + constexpr uint32_t shifted_visibly_initialized_value = + enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); + + // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize + // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code + // size, load only the high byte of the field and compare with 0xf0. + // Note: The same code size could be achieved with LDR+MNV(asr #24)+CBNZ but benchmarks + // show that this pattern is slower (tested on little cores). + __ Ldrb(temp, HeapOperand(class_reg, status_byte_offset)); + __ Cmp(temp, shifted_visibly_initialized_value); __ B(lo, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -2005,9 +2106,9 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { } case DataType::Type::kFloat32: case DataType::Type::kFloat64: { - FPRegister dst = OutputFPRegister(instr); - FPRegister lhs = InputFPRegisterAt(instr, 0); - FPRegister rhs = InputFPRegisterAt(instr, 1); + VRegister dst = OutputFPRegister(instr); + VRegister lhs = InputFPRegisterAt(instr, 0); + VRegister rhs = InputFPRegisterAt(instr, 1); if (instr->IsAdd()) { __ Fadd(dst, lhs, rhs); } else if (instr->IsSub()) { @@ -2497,12 +2598,10 @@ void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { DataType::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool needs_type_check = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, - may_need_runtime_call_for_type_check ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); + needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (IsConstantZeroBitPattern(instruction->InputAt(2))) { @@ -2517,7 +2616,7 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { DataType::Type value_type = instruction->GetComponentType(); LocationSummary* locations = instruction->GetLocations(); - bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool needs_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -2530,7 +2629,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { MacroAssembler* masm = GetVIXLAssembler(); if (!needs_write_barrier) { - DCHECK(!may_need_runtime_call_for_type_check); + DCHECK(!needs_type_check); if (index.IsConstant()) { offset += Int64FromLocation(index) << DataType::SizeShift(value_type); destination = HeapOperand(array, offset); @@ -2562,123 +2661,105 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } } else { DCHECK(!instruction->GetArray()->IsIntermediateAddress()); - vixl::aarch64::Label done; + + bool can_value_be_null = instruction->GetValueCanBeNull(); + vixl::aarch64::Label do_store; + if (can_value_be_null) { + __ Cbz(Register(value), &do_store); + } + SlowPathCodeARM64* slow_path = nullptr; - { - // We use a block to end the scratch scope before the write barrier, thus - // freeing the temporary registers so they can be used in `MarkGCCard`. + if (needs_type_check) { + slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction); + codegen_->AddSlowPath(slow_path); + + const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + UseScratchRegisterScope temps(masm); Register temp = temps.AcquireSameSizeAs(array); - if (index.IsConstant()) { - offset += Int64FromLocation(index) << DataType::SizeShift(value_type); - destination = HeapOperand(array, offset); - } else { - destination = HeapOperand(temp, - XRegisterFrom(index), - LSL, - DataType::SizeShift(value_type)); - } - - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + Register temp2 = temps.AcquireSameSizeAs(array); - if (may_need_runtime_call_for_type_check) { - slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction); - codegen_->AddSlowPath(slow_path); - if (instruction->GetValueCanBeNull()) { - vixl::aarch64::Label non_zero; - __ Cbnz(Register(value), &non_zero); - if (!index.IsConstant()) { - __ Add(temp, array, offset); - } - { - // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools - // emitted. - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - __ Str(wzr, destination); - codegen_->MaybeRecordImplicitNullCheck(instruction); - } - __ B(&done); - __ Bind(&non_zero); - } + // Note that when Baker read barriers are enabled, the type + // checks are performed without read barriers. This is fine, + // even in the case where a class object is in the from-space + // after the flip, as a comparison involving such a type would + // not produce a false positive; it may of course produce a + // false negative, in which case we would take the ArraySet + // slow path. - // Note that when Baker read barriers are enabled, the type - // checks are performed without read barriers. This is fine, - // even in the case where a class object is in the from-space - // after the flip, as a comparison involving such a type would - // not produce a false positive; it may of course produce a - // false negative, in which case we would take the ArraySet - // slow path. + // /* HeapReference<Class> */ temp = array->klass_ + { + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); + __ Ldr(temp, HeapOperand(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + GetAssembler()->MaybeUnpoisonHeapReference(temp); - Register temp2 = temps.AcquireSameSizeAs(array); - // /* HeapReference<Class> */ temp = array->klass_ - { - // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - __ Ldr(temp, HeapOperand(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - } + // /* HeapReference<Class> */ temp = temp->component_type_ + __ Ldr(temp, HeapOperand(temp, component_offset)); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ Ldr(temp2, HeapOperand(Register(value), class_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor `temp2`, as we are comparing two poisoned references. + __ Cmp(temp, temp2); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + vixl::aarch64::Label do_put; + __ B(eq, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. GetAssembler()->MaybeUnpoisonHeapReference(temp); - // /* HeapReference<Class> */ temp = temp->component_type_ - __ Ldr(temp, HeapOperand(temp, component_offset)); - // /* HeapReference<Class> */ temp2 = value->klass_ - __ Ldr(temp2, HeapOperand(Register(value), class_offset)); - // If heap poisoning is enabled, no need to unpoison `temp` - // nor `temp2`, as we are comparing two poisoned references. - __ Cmp(temp, temp2); - temps.Release(temp2); - - if (instruction->StaticTypeOfArrayIsObjectArray()) { - vixl::aarch64::Label do_put; - __ B(eq, &do_put); - // If heap poisoning is enabled, the `temp` reference has - // not been unpoisoned yet; unpoison it now. - GetAssembler()->MaybeUnpoisonHeapReference(temp); - - // /* HeapReference<Class> */ temp = temp->super_class_ - __ Ldr(temp, HeapOperand(temp, super_offset)); - // If heap poisoning is enabled, no need to unpoison - // `temp`, as we are comparing against null below. - __ Cbnz(temp, slow_path->GetEntryLabel()); - __ Bind(&do_put); - } else { - __ B(ne, slow_path->GetEntryLabel()); - } + // /* HeapReference<Class> */ temp = temp->super_class_ + __ Ldr(temp, HeapOperand(temp, super_offset)); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ B(ne, slow_path->GetEntryLabel()); } + } - if (kPoisonHeapReferences) { - Register temp2 = temps.AcquireSameSizeAs(array); - DCHECK(value.IsW()); - __ Mov(temp2, value.W()); - GetAssembler()->PoisonHeapReference(temp2); - source = temp2; - } + codegen_->MarkGCCard(array, value.W(), /* value_can_be_null= */ false); - if (!index.IsConstant()) { - __ Add(temp, array, offset); - } else { - // We no longer need the `temp` here so release it as the store below may - // need a scratch register (if the constant index makes the offset too large) - // and the poisoned `source` could be using the other scratch register. - temps.Release(temp); - } - { - // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - __ Str(source, destination); + if (can_value_be_null) { + DCHECK(do_store.IsLinked()); + __ Bind(&do_store); + } - if (!may_need_runtime_call_for_type_check) { - codegen_->MaybeRecordImplicitNullCheck(instruction); - } - } + UseScratchRegisterScope temps(masm); + if (kPoisonHeapReferences) { + Register temp_source = temps.AcquireSameSizeAs(array); + DCHECK(value.IsW()); + __ Mov(temp_source, value.W()); + GetAssembler()->PoisonHeapReference(temp_source); + source = temp_source; + } + + if (index.IsConstant()) { + offset += Int64FromLocation(index) << DataType::SizeShift(value_type); + destination = HeapOperand(array, offset); + } else { + Register temp_base = temps.AcquireSameSizeAs(array); + __ Add(temp_base, array, offset); + destination = HeapOperand(temp_base, + XRegisterFrom(index), + LSL, + DataType::SizeShift(value_type)); } - codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull()); + { + // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); + __ Str(source, destination); - if (done.IsLinked()) { - __ Bind(&done); + if (can_value_be_null || !needs_type_check) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } if (slow_path != nullptr) { @@ -2693,16 +2774,59 @@ void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) { caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode())); LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); + + // If both index and length are constant, we can check the bounds statically and + // generate code accordingly. We want to make sure we generate constant locations + // in that case, regardless of whether they are encodable in the comparison or not. + HInstruction* index = instruction->InputAt(0); + HInstruction* length = instruction->InputAt(1); + bool both_const = index->IsConstant() && length->IsConstant(); + locations->SetInAt(0, both_const + ? Location::ConstantLocation(index->AsConstant()) + : ARM64EncodableConstantOrRegister(index, instruction)); + locations->SetInAt(1, both_const + ? Location::ConstantLocation(length->AsConstant()) + : ARM64EncodableConstantOrRegister(length, instruction)); } void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location index_loc = locations->InAt(0); + Location length_loc = locations->InAt(1); + + int cmp_first_input = 0; + int cmp_second_input = 1; + Condition cond = hs; + + if (index_loc.IsConstant()) { + int64_t index = Int64FromLocation(index_loc); + if (length_loc.IsConstant()) { + int64_t length = Int64FromLocation(length_loc); + if (index < 0 || index >= length) { + BoundsCheckSlowPathARM64* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction); + codegen_->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); + } else { + // BCE will remove the bounds check if we are guaranteed to pass. + // However, some optimization after BCE may have generated this, and we should not + // generate a bounds check if it is a valid range. + } + return; + } + // Only the index is constant: change the order of the operands and commute the condition + // so we can use an immediate constant for the index (only the second input to a cmp + // instruction can be an immediate). + cmp_first_input = 1; + cmp_second_input = 0; + cond = ls; + } BoundsCheckSlowPathARM64* slow_path = new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction); + __ Cmp(InputRegisterAt(instruction, cmp_first_input), + InputOperandAt(instruction, cmp_second_input)); codegen_->AddSlowPath(slow_path); - __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1)); - __ B(slow_path->GetEntryLabel(), hs); + __ B(slow_path->GetEntryLabel(), cond); } void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) { @@ -2730,7 +2854,7 @@ static bool IsFloatingPointZeroConstant(HInstruction* inst) { } void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) { - FPRegister lhs_reg = InputFPRegisterAt(instruction, 0); + VRegister lhs_reg = InputFPRegisterAt(instruction, 0); Location rhs_loc = instruction->GetLocations()->InAt(1); if (rhs_loc.IsConstant()) { // 0.0 is the only immediate that can be encoded directly in @@ -3102,15 +3226,7 @@ void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* s HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp1 = temps.AcquireX(); - Register temp2 = temps.AcquireX(); - __ Ldr(temp1, MemOperand(sp, 0)); - __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); - __ Add(temp2, temp2, 1); - __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); - } + codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -3964,6 +4080,32 @@ void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) { HandleInvoke(invoke); } +void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction, + Register klass) { + DCHECK_EQ(klass.GetCode(), 0u); + // We know the destination of an intrinsic, so no need to record inline + // caches. + if (!instruction->GetLocations()->Intrinsified() && + GetGraph()->IsCompilingBaseline() && + !Runtime::Current()->IsAotCompiler()) { + DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke()); + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + if (info != nullptr) { + InlineCache* cache = info->GetInlineCache(instruction->GetDexPc()); + uint64_t address = reinterpret_cast64<uint64_t>(cache); + vixl::aarch64::Label done; + __ Mov(x8, address); + __ Ldr(x9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value())); + // Fast path for a monomorphic cache. + __ Cmp(klass, x9); + __ B(eq, &done); + InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc()); + __ Bind(&done); + } + } +} + void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. LocationSummary* locations = invoke->GetLocations(); @@ -3972,13 +4114,6 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok Offset class_offset = mirror::Object::ClassOffset(); Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); - // The register ip1 is required to be used for the hidden argument in - // art_quick_imt_conflict_trampoline, so prevent VIXL from using it. - MacroAssembler* masm = GetVIXLAssembler(); - UseScratchRegisterScope scratch_scope(masm); - scratch_scope.Exclude(ip1); - __ Mov(ip1, invoke->GetDexMethodIndex()); - // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. if (receiver.IsStackSlot()) { __ Ldr(temp.W(), StackOperandFrom(receiver)); @@ -4003,6 +4138,17 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok // intact/accessible until the end of the marking phase (the // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); + + // If we're compiling baseline, update the inline cache. + codegen_->MaybeGenerateInlineCacheCheck(invoke, temp); + + // The register ip1 is required to be used for the hidden argument in + // art_quick_imt_conflict_trampoline, so prevent VIXL from using it. + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope scratch_scope(masm); + scratch_scope.Exclude(ip1); + __ Mov(ip1, invoke->GetDexMethodIndex()); + __ Ldr(temp, MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( @@ -4079,7 +4225,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall( callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { - DCHECK(GetCompilerOptions().IsBootImage()); + DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); // Add ADRP with its PC-relative method patch. vixl::aarch64::Label* adrp_label = NewBootImageMethodPatch(invoke->GetTargetMethod()); EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); @@ -4108,6 +4254,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall( // Add LDR with its PC-relative .bss entry patch. vixl::aarch64::Label* ldr_label = NewMethodBssEntryPatch(target_method, adrp_label); + // All aligned loads are implicitly atomic consume operations on ARM64. EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } @@ -4182,6 +4329,10 @@ void CodeGeneratorARM64::GenerateVirtualCall( // intact/accessible until the end of the marking phase (the // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); + + // If we're compiling baseline, update the inline cache. + MaybeGenerateInlineCacheCheck(invoke, temp); + // temp = temp->GetMethodAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); // lr = temp->GetEntryPoint(); @@ -4217,14 +4368,14 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch( uint32_t intrinsic_data, vixl::aarch64::Label* adrp_label) { return NewPcRelativePatch( - /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_intrinsic_patches_); + /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_other_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch( uint32_t boot_image_offset, vixl::aarch64::Label* adrp_label) { return NewPcRelativePatch( - /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_method_patches_); + /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_other_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch( @@ -4270,6 +4421,15 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); } +void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset) { + DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope. + DCHECK(!Runtime::Current()->UseJitCompilation()); + call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value()); + vixl::aarch64::Label* bl_label = &call_entrypoint_patches_.back().label; + __ bind(bl_label); + __ bl(static_cast<int64_t>(0)); // Placeholder, patched at link-time. +} + void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) { DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope. if (Runtime::Current()->UseJitCompilation()) { @@ -4425,24 +4585,28 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin type_bss_entry_patches_.size() + boot_image_string_patches_.size() + string_bss_entry_patches_.size() + - boot_image_intrinsic_patches_.size() + + boot_image_other_patches_.size() + + call_entrypoint_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); - if (GetCompilerOptions().IsBootImage()) { + if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); - EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( - boot_image_intrinsic_patches_, linker_patches); } else { - EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( - boot_image_method_patches_, linker_patches); + DCHECK(boot_image_method_patches_.empty()); DCHECK(boot_image_type_patches_.empty()); DCHECK(boot_image_string_patches_.empty()); - DCHECK(boot_image_intrinsic_patches_.empty()); + } + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_other_patches_, linker_patches); + } else { + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_other_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -4450,6 +4614,11 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin type_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( string_bss_entry_patches_, linker_patches); + for (const PatchInfo<vixl::aarch64::Label>& info : call_entrypoint_patches_) { + DCHECK(info.target_dex_file == nullptr); + linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch( + info.label.GetLocation(), info.offset_or_index)); + } for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch( info.label.GetLocation(), info.custom_data)); @@ -4458,7 +4627,8 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin } bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const { - return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch || + return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint || + patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch || patch.GetType() == linker::LinkerPatch::Type::kCallRelative; } @@ -4478,6 +4648,14 @@ void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch, } break; } + case linker::LinkerPatch::Type::kCallEntrypoint: { + Offset offset(patch.EntrypointOffset()); + assembler.JumpTo(ManagedRegister(arm64::TR), offset, ManagedRegister(arm64::IP0)); + if (GetCompilerOptions().GenerateAnyDebugInfo()) { + *debug_name = "EntrypointCallThunk_" + std::to_string(offset.Uint32Value()); + } + break; + } case linker::LinkerPatch::Type::kBakerReadBarrierBranch: { DCHECK_EQ(patch.GetBakerCustomValue2(), 0u); CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name); @@ -4638,6 +4816,8 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(codegen_->GetCompilerOptions().IsBootImage() || + codegen_->GetCompilerOptions().IsBootImageExtension()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); // Add ADRP with its PC-relative type patch. const DexFile& dex_file = cls->GetDexFile(); @@ -4673,6 +4853,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA vixl::aarch64::Label* ldr_label = codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label); // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ + // All aligned loads are implicitly atomic consume operations on ARM64. codegen_->GenerateGcRootFieldLoad(cls, out_loc, temp, @@ -4812,7 +4993,8 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK(codegen_->GetCompilerOptions().IsBootImage() || + codegen_->GetCompilerOptions().IsBootImageExtension()); // Add ADRP with its PC-relative String patch. const DexFile& dex_file = load->GetDexFile(); const dex::StringIndex string_index = load->GetStringIndex(); @@ -4847,6 +5029,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD vixl::aarch64::Label* ldr_label = codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label); // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ + // All aligned loads are implicitly atomic consume operations on ARM64. codegen_->GenerateGcRootFieldLoad(load, out_loc, temp, @@ -5318,8 +5501,8 @@ void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) { } case DataType::Type::kFloat32: case DataType::Type::kFloat64: { - FPRegister in_reg = InputFPRegisterAt(abs, 0); - FPRegister out_reg = OutputFPRegister(abs); + VRegister in_reg = InputFPRegisterAt(abs, 0); + VRegister out_reg = OutputFPRegister(abs); __ Fabs(out_reg, in_reg); break; } @@ -5351,7 +5534,21 @@ void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { locations->SetInAt(0, ARM64ReturnLocation(return_type)); } -void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitReturn(HReturn* ret) { + if (GetGraph()->IsCompilingOsr()) { + // To simplify callers of an OSR method, we put the return value in both + // floating point and core register. + switch (ret->InputAt(0)->GetType()) { + case DataType::Type::kFloat32: + __ Fmov(w0, s0); + break; + case DataType::Type::kFloat64: + __ Fmov(x0, d0); + break; + default: + break; + } + } codegen_->GenerateFrameExit(); } @@ -5411,6 +5608,15 @@ void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruc HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); } +void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { + codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(x0)); +} + +void InstructionCodeGeneratorARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { + __ Mov(w0, instruction->GetFormat()->GetValue()); + codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc()); +} + void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet( HUnresolvedInstanceFieldGet* instruction) { FieldAccessCallingConventionARM64 calling_convention; @@ -6227,12 +6433,20 @@ void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler, CheckValidReg(holder_reg.GetCode()); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip0, ip1); - // If base_reg differs from holder_reg, the offset was too large and we must have emitted - // an explicit null check before the load. Otherwise, for implicit null checks, we need to - // null-check the holder as we do not necessarily do that check before going to the thunk. + // In the case of a field load (with relaxed semantic), if `base_reg` differs from + // `holder_reg`, the offset was too large and we must have emitted (during the construction + // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved + // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before + // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do + // not necessarily do that check before going to the thunk. + // + // In the case of a field load with load-acquire semantics (where `base_reg` always differs + // from `holder_reg`), we also need an explicit null check when implicit null checks are + // allowed, as we do not emit one before going to the thunk. vixl::aarch64::Label throw_npe_label; vixl::aarch64::Label* throw_npe = nullptr; - if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) { + if (GetCompilerOptions().GetImplicitNullChecks() && + (holder_reg.Is(base_reg) || (kind == BakerReadBarrierKind::kAcquire))) { throw_npe = &throw_npe_label; __ Cbz(holder_reg.W(), throw_npe); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index ada5742fc0..6b2c80529b 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -63,7 +63,7 @@ static const vixl::aarch64::Register kParameterCoreRegisters[] = { vixl::aarch64::x7 }; static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); -static const vixl::aarch64::FPRegister kParameterFPRegisters[] = { +static const vixl::aarch64::VRegister kParameterFPRegisters[] = { vixl::aarch64::d0, vixl::aarch64::d1, vixl::aarch64::d2, @@ -111,7 +111,7 @@ const vixl::aarch64::CPURegList callee_saved_core_registers( ? vixl::aarch64::x21.GetCode() : vixl::aarch64::x20.GetCode()), vixl::aarch64::x30.GetCode()); -const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kFPRegister, +const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kVRegister, vixl::aarch64::kDRegSize, vixl::aarch64::d8.GetCode(), vixl::aarch64::d15.GetCode()); @@ -162,7 +162,7 @@ static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] = vixl::aarch64::x7 }; static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); -static const vixl::aarch64::FPRegister kRuntimeParameterFpuRegisters[] = +static const vixl::aarch64::VRegister kRuntimeParameterFpuRegisters[] = { vixl::aarch64::d0, vixl::aarch64::d1, vixl::aarch64::d2, @@ -175,7 +175,7 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterCoreRegisters); class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::Register, - vixl::aarch64::FPRegister> { + vixl::aarch64::VRegister> { public: static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); @@ -193,7 +193,7 @@ class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::R }; class InvokeDexCallingConvention : public CallingConvention<vixl::aarch64::Register, - vixl::aarch64::FPRegister> { + vixl::aarch64::VRegister> { public: InvokeDexCallingConvention() : CallingConvention(kParameterCoreRegisters, @@ -435,10 +435,14 @@ class CodeGeneratorARM64 : public CodeGenerator { return kArm64WordSize; } - size_t GetFloatingPointSpillSlotSize() const override { + size_t GetSlowPathFPWidth() const override { return GetGraph()->HasSIMD() - ? 2 * kArm64WordSize // 16 bytes == 2 arm64 words for each spill - : 1 * kArm64WordSize; // 8 bytes == 1 arm64 words for each spill + ? vixl::aarch64::kQRegSizeInBytes + : vixl::aarch64::kDRegSizeInBytes; + } + + size_t GetCalleePreservedFPWidth() const override { + return vixl::aarch64::kDRegSizeInBytes; } uintptr_t GetAddressOf(HBasicBlock* block) override { @@ -476,7 +480,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // requirements, etc.). This also facilitates our task as all other registers // can easily be mapped via to or from their type and index or code. static const int kNumberOfAllocatableRegisters = vixl::aarch64::kNumberOfRegisters - 1; - static const int kNumberOfAllocatableFPRegisters = vixl::aarch64::kNumberOfFPRegisters; + static const int kNumberOfAllocatableFPRegisters = vixl::aarch64::kNumberOfVRegisters; static constexpr int kNumberOfAllocatableRegisterPairs = 0; void DumpCoreRegister(std::ostream& stream, int reg) const override; @@ -629,6 +633,9 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::StringIndex string_index, vixl::aarch64::Label* adrp_label = nullptr); + // Emit the BL instruction for entrypoint thunk call and record the associated patch for AOT. + void EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset); + // Emit the CBNZ instruction for baker read barrier and record // the associated patch for AOT or slow path for JIT. void EmitBakerReadBarrierCbnz(uint32_t custom_data); @@ -770,6 +777,18 @@ class CodeGeneratorARM64 : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction) override; void GenerateExplicitNullCheck(HNullCheck* instruction) override; + void MaybeRecordImplicitNullCheck(HInstruction* instr) final { + // The function must be only called within special scopes + // (EmissionCheckScope, ExactAssemblyScope) which prevent generation of + // veneer/literal pools by VIXL assembler. + CHECK_EQ(GetVIXLAssembler()->ArePoolsBlocked(), true) + << "The function must only be called within EmissionCheckScope or ExactAssemblyScope"; + CodeGenerator::MaybeRecordImplicitNullCheck(instr); + } + + void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl::aarch64::Register klass); + void MaybeIncrementHotness(bool is_frame_entry); + private: // Encoding of thunk type and data for link-time generated thunks for Baker read barriers. @@ -887,12 +906,7 @@ class CodeGeneratorARM64 : public CodeGenerator { ParallelMoveResolverARM64 move_resolver_; Arm64Assembler assembler_; - // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. - Uint32ToLiteralMap uint32_literals_; - // Deduplication map for 64-bit literals, used for non-patchable method address or method code. - Uint64ToLiteralMap uint64_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative/BootImageRelRo. - // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). + // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -904,11 +918,18 @@ class CodeGeneratorARM64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; - // PC-relative patch info for IntrinsicObjects. - ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; + // PC-relative patch info for IntrinsicObjects for the boot image, + // and for method/type/string patches for kBootImageRelRo otherwise. + ArenaDeque<PcRelativePatchInfo> boot_image_other_patches_; + // Patch info for calls to entrypoint dispatch thunks. Used for slow paths. + ArenaDeque<PatchInfo<vixl::aarch64::Label>> call_entrypoint_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; + // Deduplication map for 32-bit literals, used for JIT for boot image addresses. + Uint32ToLiteralMap uint32_literals_; + // Deduplication map for 64-bit literals, used for JIT for method address or method code. + Uint64ToLiteralMap uint64_literals_; // Patches for string literals in JIT compiled code. StringToLiteralMap jit_string_patches_; // Patches for class literals in JIT compiled code. diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 6469c6964a..3a2cf40f04 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -18,7 +18,7 @@ #include "arch/arm/asm_support_arm.h" #include "arch/arm/instruction_set_features_arm.h" -#include "art_method.h" +#include "art_method-inl.h" #include "base/bit_utils.h" #include "base/bit_utils_iterator.h" #include "class_table.h" @@ -34,6 +34,7 @@ #include "linker/linker_patch.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" +#include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/arm/assembler_arm_vixl.h" #include "utils/arm/managed_register_arm.h" @@ -47,7 +48,6 @@ namespace vixl32 = vixl::aarch32; using namespace vixl32; // NOLINT(build/namespaces) using helpers::DRegisterFrom; -using helpers::DWARFReg; using helpers::HighRegisterFrom; using helpers::InputDRegisterAt; using helpers::InputOperandAt; @@ -69,6 +69,7 @@ using helpers::RegisterFrom; using helpers::SRegisterFrom; using helpers::Uint64ConstantFrom; +using vixl::EmissionCheckScope; using vixl::ExactAssemblyScope; using vixl::CodeBufferCheckScope; @@ -1856,16 +1857,17 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), assembler_(graph->GetAllocator()), - uint32_literals_(std::less<uint32_t>(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + uint32_literals_(std::less<uint32_t>(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -2060,10 +2062,10 @@ InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph, void CodeGeneratorARMVIXL::ComputeSpillMask() { core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; - DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; - // There is no easy instruction to restore just the PC on thumb2. We spill and - // restore another arbitrary register. - core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister.GetCode()); + DCHECK_NE(core_spill_mask_ & (1u << kLrCode), 0u) + << "At least the return address register must be saved"; + // 16-bit PUSH/POP (T1) can save/restore just the LR/PC. + DCHECK(GetVIXLAssembler()->IsUsingT32()); fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; // We use vpush and vpop for saving and restoring floating point registers, which take // a SRegister and the number of registers to save/restore after that SRegister. We @@ -2078,23 +2080,81 @@ void CodeGeneratorARMVIXL::ComputeSpillMask() { } } -void CodeGeneratorARMVIXL::GenerateFrameEntry() { - bool skip_overflow_check = - IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); - DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - __ Bind(&frame_entry_label_); - +void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) { if (GetCompilerOptions().CountHotnessInCompiledCode()) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); + static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong"); + if (!is_frame_entry) { + __ Push(vixl32::Register(kMethodRegister)); + GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize); + } + // Load with zero extend to clear the high bits for integer overflow check. __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); __ Add(temp, temp, 1); + // Subtract one if the counter would overflow. + __ Sub(temp, temp, Operand(temp, ShiftType::LSR, 16)); __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + if (!is_frame_entry) { + __ Pop(vixl32::Register(kMethodRegister)); + } + } + + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + if (info != nullptr) { + uint32_t address = reinterpret_cast32<uint32_t>(info); + vixl::aarch32::Label done; + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + if (!is_frame_entry) { + __ Push(r4); // Will be used as temporary. For frame entry, r4 is always available. + } + __ Mov(r4, address); + __ Ldrh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + __ Add(ip, ip, 1); + __ Strh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + if (!is_frame_entry) { + __ Pop(r4); + } + __ Lsls(ip, ip, 16); + __ B(ne, &done); + uint32_t entry_point_offset = + GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value(); + if (HasEmptyFrame()) { + CHECK(is_frame_entry); + // For leaf methods, we need to spill lr and r0. Also spill r1 and r2 for + // alignment. + uint32_t core_spill_mask = + (1 << lr.GetCode()) | (1 << r0.GetCode()) | (1 << r1.GetCode()) | (1 << r2.GetCode()); + __ Push(RegisterList(core_spill_mask)); + __ Ldr(lr, MemOperand(tr, entry_point_offset)); + __ Blx(lr); + __ Pop(RegisterList(core_spill_mask)); + } else { + if (!RequiresCurrentMethod()) { + CHECK(is_frame_entry); + GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0); + } + __ Ldr(lr, MemOperand(tr, entry_point_offset)); + __ Blx(lr); + } + __ Bind(&done); + } } +} + +void CodeGeneratorARMVIXL::GenerateFrameEntry() { + bool skip_overflow_check = + IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + __ Bind(&frame_entry_label_); if (HasEmptyFrame()) { // Ensure that the CFI opcode list is not empty. GetAssembler()->cfi().Nop(); + MaybeIncrementHotness(/* is_frame_entry= */ true); return; } @@ -2125,32 +2185,66 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { RecordPcInfo(nullptr, 0); } - __ Push(RegisterList(core_spill_mask_)); - GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_)); - GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister), - 0, - core_spill_mask_, - kArmWordSize); - if (fpu_spill_mask_ != 0) { - uint32_t first = LeastSignificantBit(fpu_spill_mask_); - - // Check that list is contiguous. - DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_))); - - __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_))); - GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_)); - GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0), 0, fpu_spill_mask_, kArmWordSize); - } - - int adjust = GetFrameSize() - FrameEntrySpillSize(); - __ Sub(sp, sp, adjust); - GetAssembler()->cfi().AdjustCFAOffset(adjust); - - // Save the current method if we need it. Note that we do not - // do this in HCurrentMethod, as the instruction might have been removed - // in the SSA graph. - if (RequiresCurrentMethod()) { - GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0); + uint32_t frame_size = GetFrameSize(); + uint32_t core_spills_offset = frame_size - GetCoreSpillSize(); + uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize(); + if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) && + core_spills_offset <= 3u * kArmWordSize) { + // Do a single PUSH for core registers including the method and up to two + // filler registers. Then store the single FP spill if any. + // (The worst case is when the method is not required and we actually + // store 3 extra registers but they are stored in the same properly + // aligned 16-byte chunk where we're already writing anyway.) + DCHECK_EQ(kMethodRegister.GetCode(), 0u); + uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize); + DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(core_spill_mask_)); + __ Push(RegisterList(core_spill_mask_ | extra_regs)); + GetAssembler()->cfi().AdjustCFAOffset(frame_size); + GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister), + core_spills_offset, + core_spill_mask_, + kArmWordSize); + if (fpu_spill_mask_ != 0u) { + DCHECK(IsPowerOfTwo(fpu_spill_mask_)); + vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_)); + GetAssembler()->StoreSToOffset(sreg, sp, fp_spills_offset); + GetAssembler()->cfi().RelOffset(DWARFReg(sreg), /*offset=*/ fp_spills_offset); + } + } else { + __ Push(RegisterList(core_spill_mask_)); + GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_)); + GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister), + /*offset=*/ 0, + core_spill_mask_, + kArmWordSize); + if (fpu_spill_mask_ != 0) { + uint32_t first = LeastSignificantBit(fpu_spill_mask_); + + // Check that list is contiguous. + DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_))); + + __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_))); + GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_)); + GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0), + /*offset=*/ 0, + fpu_spill_mask_, + kArmWordSize); + } + + // Adjust SP and save the current method if we need it. Note that we do + // not save the method in HCurrentMethod, as the instruction might have + // been removed in the SSA graph. + if (RequiresCurrentMethod() && fp_spills_offset <= 3 * kArmWordSize) { + DCHECK_EQ(kMethodRegister.GetCode(), 0u); + __ Push(RegisterList(MaxInt<uint32_t>(fp_spills_offset / kArmWordSize))); + GetAssembler()->cfi().AdjustCFAOffset(fp_spills_offset); + } else { + __ Sub(sp, sp, dchecked_integral_cast<int32_t>(fp_spills_offset)); + GetAssembler()->cfi().AdjustCFAOffset(fp_spills_offset); + if (RequiresCurrentMethod()) { + GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0); + } + } } if (GetGraph()->HasShouldDeoptimizeFlag()) { @@ -2161,6 +2255,7 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag()); } + MaybeIncrementHotness(/* is_frame_entry= */ true); MaybeGenerateMarkingRegisterCheck(/* code= */ 1); } @@ -2169,27 +2264,55 @@ void CodeGeneratorARMVIXL::GenerateFrameExit() { __ Bx(lr); return; } - GetAssembler()->cfi().RememberState(); - int adjust = GetFrameSize() - FrameEntrySpillSize(); - __ Add(sp, sp, adjust); - GetAssembler()->cfi().AdjustCFAOffset(-adjust); - if (fpu_spill_mask_ != 0) { - uint32_t first = LeastSignificantBit(fpu_spill_mask_); - // Check that list is contiguous. - DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_))); - - __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_))); - GetAssembler()->cfi().AdjustCFAOffset( - -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_)); - GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_); - } // Pop LR into PC to return. DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U); uint32_t pop_mask = (core_spill_mask_ & (~(1 << kLrCode))) | 1 << kPcCode; - __ Pop(RegisterList(pop_mask)); - GetAssembler()->cfi().RestoreState(); - GetAssembler()->cfi().DefCFAOffset(GetFrameSize()); + + uint32_t frame_size = GetFrameSize(); + uint32_t core_spills_offset = frame_size - GetCoreSpillSize(); + uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize(); + if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) && + // r4 is blocked by TestCodeGeneratorARMVIXL used by some tests. + core_spills_offset <= (blocked_core_registers_[r4.GetCode()] ? 2u : 3u) * kArmWordSize) { + // Load the FP spill if any and then do a single POP including the method + // and up to two filler registers. If we have no FP spills, this also has + // the advantage that we do not need to emit CFI directives. + if (fpu_spill_mask_ != 0u) { + DCHECK(IsPowerOfTwo(fpu_spill_mask_)); + vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_)); + GetAssembler()->cfi().RememberState(); + GetAssembler()->LoadSFromOffset(sreg, sp, fp_spills_offset); + GetAssembler()->cfi().Restore(DWARFReg(sreg)); + } + // Clobber registers r2-r4 as they are caller-save in ART managed ABI and + // never hold the return value. + uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize) << r2.GetCode(); + DCHECK_EQ(extra_regs & kCoreCalleeSaves.GetList(), 0u); + DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(pop_mask)); + __ Pop(RegisterList(pop_mask | extra_regs)); + if (fpu_spill_mask_ != 0u) { + GetAssembler()->cfi().RestoreState(); + } + } else { + GetAssembler()->cfi().RememberState(); + __ Add(sp, sp, fp_spills_offset); + GetAssembler()->cfi().AdjustCFAOffset(-dchecked_integral_cast<int32_t>(fp_spills_offset)); + if (fpu_spill_mask_ != 0) { + uint32_t first = LeastSignificantBit(fpu_spill_mask_); + + // Check that list is contiguous. + DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_))); + + __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_))); + GetAssembler()->cfi().AdjustCFAOffset( + -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_)); + GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_); + } + __ Pop(RegisterList(pop_mask)); + GetAssembler()->cfi().RestoreState(); + GetAssembler()->cfi().DefCFAOffset(GetFrameSize()); + } } void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) { @@ -2383,15 +2506,31 @@ void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint, uint32_t dex_pc, SlowPathCode* slow_path) { ValidateInvokeRuntime(entrypoint, instruction, slow_path); - __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value())); - // Ensure the pc position is recorded immediately after the `blx` instruction. - // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. - ExactAssemblyScope aas(GetVIXLAssembler(), - vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); - __ blx(lr); - if (EntrypointRequiresStackMap(entrypoint)) { - RecordPcInfo(instruction, dex_pc, slow_path); + + ThreadOffset32 entrypoint_offset = GetThreadOffset<kArmPointerSize>(entrypoint); + // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the + // entire oat file. This adds an extra branch and we do not want to slow down the main path. + // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative. + if (slow_path == nullptr || Runtime::Current()->UseJitCompilation()) { + __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value())); + // Ensure the pc position is recorded immediately after the `blx` instruction. + // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. + ExactAssemblyScope aas(GetVIXLAssembler(), + vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + __ blx(lr); + if (EntrypointRequiresStackMap(entrypoint)) { + RecordPcInfo(instruction, dex_pc, slow_path); + } + } else { + // Ensure the pc position is recorded immediately after the `bl` instruction. + ExactAssemblyScope aas(GetVIXLAssembler(), + vixl32::k32BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + EmitEntrypointThunkCall(entrypoint_offset); + if (EntrypointRequiresStackMap(entrypoint)) { + RecordPcInfo(instruction, dex_pc, slow_path); + } } } @@ -2414,16 +2553,7 @@ void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - __ Push(vixl32::Register(kMethodRegister)); - GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize); - __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); - __ Add(temp, temp, 1); - __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); - __ Pop(vixl32::Register(kMethodRegister)); - } + codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -3124,7 +3254,21 @@ void LocationsBuilderARMVIXL::VisitReturn(HReturn* ret) { locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType())); } -void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret) { + if (GetGraph()->IsCompilingOsr()) { + // To simplify callers of an OSR method, we put the return value in both + // floating point and core registers. + switch (ret->InputAt(0)->GetType()) { + case DataType::Type::kFloat32: + __ Vmov(r0, s0); + break; + case DataType::Type::kFloat64: + __ Vmov(r0, r1, d0); + break; + default: + break; + } + } codegen_->GenerateFrameExit(); } @@ -3211,6 +3355,34 @@ void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) { invoke->GetLocations()->AddTemp(LocationFrom(r12)); } +void CodeGeneratorARMVIXL::MaybeGenerateInlineCacheCheck(HInstruction* instruction, + vixl32::Register klass) { + DCHECK_EQ(r0.GetCode(), klass.GetCode()); + // We know the destination of an intrinsic, so no need to record inline + // caches. + if (!instruction->GetLocations()->Intrinsified() && + GetGraph()->IsCompilingBaseline() && + !Runtime::Current()->IsAotCompiler()) { + DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke()); + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + if (info != nullptr) { + InlineCache* cache = info->GetInlineCache(instruction->GetDexPc()); + uint32_t address = reinterpret_cast32<uint32_t>(cache); + vixl32::Label done; + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + __ Mov(r4, address); + __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value())); + // Fast path for a monomorphic cache. + __ Cmp(klass, ip); + __ B(eq, &done, /* is_far_target= */ false); + InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc()); + __ Bind(&done); + } + } +} + void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. LocationSummary* locations = invoke->GetLocations(); @@ -3238,10 +3410,15 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* inv // intact/accessible until the end of the marking phase (the // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // If we're compiling baseline, update the inline cache. + codegen_->MaybeGenerateInlineCacheCheck(invoke, temp); + GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value()); + uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( invoke->GetImtIndex(), kArmPointerSize)); // temp = temp->GetImtEntryAt(method_offset); @@ -5358,24 +5535,29 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: { + // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); StoreOperandType operand_type = GetStoreOperandType(field_type); GetAssembler()->StoreToOffset(operand_type, RegisterFrom(value), base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } case DataType::Type::kReference: { + vixl32::Register value_reg = RegisterFrom(value); if (kPoisonHeapReferences && needs_write_barrier) { // Note that in the case where `value` is a null reference, // we do not enter this block, as a null reference does not // need poisoning. DCHECK_EQ(field_type, DataType::Type::kReference); - vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); - __ Mov(temp, RegisterFrom(value)); - GetAssembler()->PoisonHeapReference(temp); - GetAssembler()->StoreToOffset(kStoreWord, temp, base, offset); - } else { - GetAssembler()->StoreToOffset(kStoreWord, RegisterFrom(value), base, offset); + value_reg = RegisterFrom(locations->GetTemp(0)); + __ Mov(value_reg, RegisterFrom(value)); + GetAssembler()->PoisonHeapReference(value_reg); } + // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); + GetAssembler()->StoreToOffset(kStoreWord, value_reg, base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -5389,6 +5571,8 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, RegisterFrom(locations->GetTemp(1)), instruction); } else { + // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset); codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -5396,7 +5580,10 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, } case DataType::Type::kFloat32: { + // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -5416,6 +5603,8 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, RegisterFrom(locations->GetTemp(3)), instruction); } else { + // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); GetAssembler()->StoreDToOffset(value_reg, base, offset); codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -5429,16 +5618,6 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } - // Longs and doubles are handled in the switch. - if (field_type != DataType::Type::kInt64 && field_type != DataType::Type::kFloat64) { - // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method, we - // should use a scope and the assembler to emit the store instruction to guarantee that we - // record the pc at the correct position. But the `Assembler` does not automatically handle - // unencodable offsets. Practically, everything is fine because the helper and VIXL, at the time - // of writing, do generate the store instruction last. - codegen_->MaybeRecordImplicitNullCheck(instruction); - } - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); vixl32::Register card = RegisterFrom(locations->GetTemp(1)); @@ -5601,8 +5780,11 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: { + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); LoadOperandType operand_type = GetLoadOperandType(load_type); GetAssembler()->LoadFromOffset(operand_type, RegisterFrom(out), base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -5618,8 +5800,12 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } } else { - GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); + { + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -5631,26 +5817,34 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, break; } - case DataType::Type::kInt64: + case DataType::Type::kInt64: { + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out)); } else { GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; + } - case DataType::Type::kFloat32: + case DataType::Type::kFloat32: { + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); break; + } case DataType::Type::kFloat64: { + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); vixl32::DRegister out_dreg = DRegisterFrom(out); if (is_volatile && !atomic_ldrd_strd) { vixl32::Register lo = RegisterFrom(locations->GetTemp(0)); vixl32::Register hi = RegisterFrom(locations->GetTemp(1)); GenerateWideAtomicLoad(base, offset, lo, hi); - // TODO(VIXL): Do we need to be immediately after the ldrexd instruction? If so we need a - // scope. codegen_->MaybeRecordImplicitNullCheck(instruction); __ Vmov(out_dreg, lo, hi); } else { @@ -5667,19 +5861,6 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, UNREACHABLE(); } - if (load_type == DataType::Type::kReference || load_type == DataType::Type::kFloat64) { - // Potential implicit null checks, in the case of reference or - // double fields, are handled in the previous switch statement. - } else { - // Address cases other than reference and double that may require an implicit null check. - // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method, we - // should use a scope and the assembler to emit the load instruction to guarantee that we - // record the pc at the correct position. But the `Assembler` does not automatically handle - // unencodable offsets. Practically, everything is fine because the helper and VIXL, at the time - // of writing, do generate the store instruction last. - codegen_->MaybeRecordImplicitNullCheck(instruction); - } - if (is_volatile) { if (load_type == DataType::Type::kReference) { // Memory barriers, in the case of references, are also handled @@ -5722,6 +5903,15 @@ void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instr HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); } +void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { + codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(r0)); +} + +void InstructionCodeGeneratorARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { + __ Mov(r0, instruction->GetFormat()->GetValue()); + codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc()); +} + void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldGet( HUnresolvedInstanceFieldGet* instruction) { FieldAccessCallingConventionARMVIXL calling_convention; @@ -5958,6 +6148,8 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { if (maybe_compressed_char_at) { length = RegisterFrom(locations->GetTemp(0)); uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset); codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -5986,8 +6178,11 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { } else { uint32_t full_offset = data_offset + (const_index << DataType::SizeShift(type)); + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); LoadOperandType load_type = GetLoadOperandType(type); GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); } } else { UseScratchRegisterScope temps(GetVIXLAssembler()); @@ -6020,7 +6215,10 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Bind(&done); } } else { + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index)); + codegen_->MaybeRecordImplicitNullCheck(instruction); } } break; @@ -6060,15 +6258,13 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { } else { vixl32::Register out = OutputRegister(instruction); if (index.IsConstant()) { - size_t offset = - (Int32ConstantFrom(index) << TIMES_4) + data_offset; - GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset); - // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method, - // we should use a scope and the assembler to emit the load instruction to guarantee that - // we record the pc at the correct position. But the `Assembler` does not automatically - // handle unencodable offsets. Practically, everything is fine because the helper and - // VIXL, at the time of writing, do generate the store instruction last. - codegen_->MaybeRecordImplicitNullCheck(instruction); + size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset; + { + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); + GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } // If read barriers are enabled, emit read barriers other than // Baker's using a slow path (and also unpoison the loaded // reference, if heap poisoning is enabled). @@ -6089,12 +6285,13 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { } else { __ Add(temp, obj, data_offset); } - codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index)); - temps.Close(); - // TODO(VIXL): Use a scope to ensure that we record the pc position immediately after the - // load instruction. Practically, everything is fine because the helper and VIXL, at the - // time of writing, do generate the store instruction last. - codegen_->MaybeRecordImplicitNullCheck(instruction); + { + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); + codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index)); + temps.Close(); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } // If read barriers are enabled, emit read barriers other than // Baker's using a slow path (and also unpoison the loaded // reference, if heap poisoning is enabled). @@ -6106,6 +6303,9 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { } case DataType::Type::kInt64: { + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + // As two macro instructions can be emitted the max size is doubled. + EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes); if (index.IsConstant()) { size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset; @@ -6116,10 +6316,14 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8)); GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), temp, data_offset); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } case DataType::Type::kFloat32: { + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + // As two macro instructions can be emitted the max size is doubled. + EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes); vixl32::SRegister out = SRegisterFrom(out_loc); if (index.IsConstant()) { size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset; @@ -6130,10 +6334,14 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4)); GetAssembler()->LoadSFromOffset(out, temp, data_offset); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } case DataType::Type::kFloat64: { + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + // As two macro instructions can be emitted the max size is doubled. + EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes); if (index.IsConstant()) { size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset; GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset); @@ -6143,6 +6351,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8)); GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), temp, data_offset); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -6152,15 +6361,6 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } - - if (type == DataType::Type::kReference) { - // Potential implicit null checks, in the case of reference - // arrays, are handled in the previous switch statement. - } else if (!maybe_compressed_char_at) { - // TODO(VIXL): Use a scope to ensure we record the pc info immediately after - // the preceding load instruction. - codegen_->MaybeRecordImplicitNullCheck(instruction); - } } void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) { @@ -6168,13 +6368,11 @@ void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool needs_type_check = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, - may_need_runtime_call_for_type_check ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); + needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); @@ -6195,7 +6393,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { vixl32::Register array = InputRegisterAt(instruction, 0); Location index = locations->InAt(1); DataType::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool needs_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); uint32_t data_offset = @@ -6216,7 +6414,10 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { uint32_t full_offset = data_offset + (const_index << DataType::SizeShift(value_type)); StoreOperandType store_type = GetStoreOperandType(value_type); + // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); } else { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); @@ -6233,7 +6434,10 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { } else { __ Add(temp, array, data_offset); } + // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index)); + codegen_->MaybeRecordImplicitNullCheck(instruction); } break; } @@ -6245,10 +6449,12 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { DCHECK(!has_intermediate_address); if (instruction->InputAt(2)->IsNullConstant()) { + // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. + // As two macro instructions can be emitted the max size is doubled. + EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes); // Just setting null. if (index.IsConstant()) { - size_t offset = - (Int32ConstantFrom(index) << TIMES_4) + data_offset; + size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset; GetAssembler()->StoreToOffset(kStoreWord, value, array, offset); } else { DCHECK(index.IsRegister()) << index; @@ -6257,11 +6463,9 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { __ Add(temp, array, data_offset); codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index)); } - // TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding - // store instruction. codegen_->MaybeRecordImplicitNullCheck(instruction); DCHECK(!needs_write_barrier); - DCHECK(!may_need_runtime_call_for_type_check); + DCHECK(!needs_type_check); break; } @@ -6270,36 +6474,21 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { vixl32::Register temp1 = RegisterFrom(temp1_loc); Location temp2_loc = locations->GetTemp(1); vixl32::Register temp2 = RegisterFrom(temp2_loc); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - vixl32::Label done; - vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); - SlowPathCodeARMVIXL* slow_path = nullptr; - if (may_need_runtime_call_for_type_check) { + bool can_value_be_null = instruction->GetValueCanBeNull(); + vixl32::Label do_store; + if (can_value_be_null) { + __ CompareAndBranchIfZero(value, &do_store, /* is_far_target= */ false); + } + + SlowPathCodeARMVIXL* slow_path = nullptr; + if (needs_type_check) { slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARMVIXL(instruction); codegen_->AddSlowPath(slow_path); - if (instruction->GetValueCanBeNull()) { - vixl32::Label non_zero; - __ CompareAndBranchIfNonZero(value, &non_zero); - if (index.IsConstant()) { - size_t offset = - (Int32ConstantFrom(index) << TIMES_4) + data_offset; - GetAssembler()->StoreToOffset(kStoreWord, value, array, offset); - } else { - DCHECK(index.IsRegister()) << index; - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - __ Add(temp, array, data_offset); - codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index)); - } - // TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding - // store instruction. - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ B(final_label); - __ Bind(&non_zero); - } + + const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); // Note that when read barriers are enabled, the type checks // are performed without read barriers. This is fine, even in @@ -6346,6 +6535,13 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { } } + codegen_->MarkGCCard(temp1, temp2, array, value, /* can_be_null= */ false); + + if (can_value_be_null) { + DCHECK(do_store.IsReferenced()); + __ Bind(&do_store); + } + vixl32::Register source = value; if (kPoisonHeapReferences) { // Note that in the case where `value` is a null reference, @@ -6357,32 +6553,28 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { source = temp1; } - if (index.IsConstant()) { - size_t offset = - (Int32ConstantFrom(index) << TIMES_4) + data_offset; - GetAssembler()->StoreToOffset(kStoreWord, source, array, offset); - } else { - DCHECK(index.IsRegister()) << index; - - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - __ Add(temp, array, data_offset); - codegen_->StoreToShiftedRegOffset(value_type, - LocationFrom(source), - temp, - RegisterFrom(index)); - } - - if (!may_need_runtime_call_for_type_check) { - // TODO(VIXL): Ensure we record the pc position immediately after the preceding store - // instruction. - codegen_->MaybeRecordImplicitNullCheck(instruction); - } + { + // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. + // As two macro instructions can be emitted the max size is doubled. + EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes); + if (index.IsConstant()) { + size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset; + GetAssembler()->StoreToOffset(kStoreWord, source, array, offset); + } else { + DCHECK(index.IsRegister()) << index; - codegen_->MarkGCCard(temp1, temp2, array, value, instruction->GetValueCanBeNull()); + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Add(temp, array, data_offset); + codegen_->StoreToShiftedRegOffset(value_type, + LocationFrom(source), + temp, + RegisterFrom(index)); + } - if (done.IsReferenced()) { - __ Bind(&done); + if (can_value_be_null || !needs_type_check) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } if (slow_path != nullptr) { @@ -6393,6 +6585,9 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { } case DataType::Type::kInt64: { + // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. + // As two macro instructions can be emitted the max size is doubled. + EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes); Location value = locations->InAt(2); if (index.IsConstant()) { size_t offset = @@ -6404,10 +6599,14 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8)); GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), temp, data_offset); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } case DataType::Type::kFloat32: { + // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. + // As two macro instructions can be emitted the max size is doubled. + EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes); Location value = locations->InAt(2); DCHECK(value.IsFpuRegister()); if (index.IsConstant()) { @@ -6419,10 +6618,14 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4)); GetAssembler()->StoreSToOffset(SRegisterFrom(value), temp, data_offset); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } case DataType::Type::kFloat64: { + // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. + // As two macro instructions can be emitted the max size is doubled. + EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes); Location value = locations->InAt(2); DCHECK(value.IsFpuRegisterPair()); if (index.IsConstant()) { @@ -6434,6 +6637,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8)); GetAssembler()->StoreDToOffset(DRegisterFrom(value), temp, data_offset); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -6443,13 +6647,6 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { LOG(FATAL) << "Unreachable type " << value_type; UNREACHABLE(); } - - // Objects are handled in the switch. - if (value_type != DataType::Type::kReference) { - // TODO(VIXL): Ensure we record the pc position immediately after the preceding store - // instruction. - codegen_->MaybeRecordImplicitNullCheck(instruction); - } } void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) { @@ -7007,7 +7204,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK(codegen_->GetCompilerOptions().IsBootImage() || + codegen_->GetCompilerOptions().IsBootImageExtension()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); @@ -7026,6 +7224,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); + // All aligned loads are implicitly atomic consume operations on ARM. codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset= */ 0, read_barrier_option); generate_null_check = true; break; @@ -7112,17 +7311,13 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - const size_t status_byte_offset = - mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_initialized_value = - enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + constexpr uint32_t shifted_visibly_initialized_value = + enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << status_lsb_position; - GetAssembler()->LoadFromOffset(kLoadUnsignedByte, temp, class_reg, status_byte_offset); - __ Cmp(temp, shifted_initialized_value); + const size_t status_offset = mirror::Class::StatusOffset().SizeValue(); + GetAssembler()->LoadFromOffset(kLoadWord, temp, class_reg, status_offset); + __ Cmp(temp, shifted_visibly_initialized_value); __ B(lo, slow_path->GetEntryLabel()); - // Even if the initialized flag is set, we may be in a situation where caches are not synced - // properly. Therefore, we do a memory fence. - __ Dmb(ISH); __ Bind(slow_path->GetExitLabel()); } @@ -7234,7 +7429,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE switch (load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK(codegen_->GetCompilerOptions().IsBootImage() || + codegen_->GetCompilerOptions().IsBootImageExtension()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); @@ -7252,6 +7448,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); + // All aligned loads are implicitly atomic consume operations on ARM. codegen_->GenerateGcRootFieldLoad( load, out_loc, out, /* offset= */ 0, kCompilerReadBarrierOption); LoadStringSlowPathARMVIXL* slow_path = @@ -8703,7 +8900,7 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { - DCHECK(GetCompilerOptions().IsBootImage()); + DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); PcRelativePatchInfo* labels = NewBootImageMethodPatch(invoke->GetTargetMethod()); vixl32::Register temp_reg = RegisterFrom(temp); EmitMovwMovtPlaceholder(labels, temp_reg); @@ -8722,6 +8919,7 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); vixl32::Register temp_reg = RegisterFrom(temp); EmitMovwMovtPlaceholder(labels, temp_reg); + // All aligned loads are implicitly atomic consume operations on ARM. GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0); break; } @@ -8799,6 +8997,9 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall( // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp); + // If we're compiling baseline, update the inline cache. + MaybeGenerateInlineCacheCheck(invoke, temp); + // temp = temp->GetMethodAt(method_offset); uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( kArmPointerSize).Int32Value(); @@ -8819,14 +9020,14 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall( CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageIntrinsicPatch( uint32_t intrinsic_data) { - return NewPcRelativePatch(/* dex_file= */ nullptr, intrinsic_data, &boot_image_intrinsic_patches_); + return NewPcRelativePatch(/* dex_file= */ nullptr, intrinsic_data, &boot_image_other_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch( uint32_t boot_image_offset) { return NewPcRelativePatch(/* dex_file= */ nullptr, boot_image_offset, - &boot_image_method_patches_); + &boot_image_other_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch( @@ -8867,6 +9068,17 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa return &patches->back(); } +void CodeGeneratorARMVIXL::EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset) { + DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope. + DCHECK(!Runtime::Current()->UseJitCompilation()); + call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value()); + vixl::aarch32::Label* bl_label = &call_entrypoint_patches_.back().label; + __ bind(bl_label); + vixl32::Label placeholder_label; + __ bl(&placeholder_label); // Placeholder, patched at link-time. + __ bind(&placeholder_label); +} + void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) { DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope. if (Runtime::Current()->UseJitCompilation()) { @@ -8988,24 +9200,28 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() + /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * boot_image_intrinsic_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * boot_image_other_patches_.size() + + call_entrypoint_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); - if (GetCompilerOptions().IsBootImage()) { + if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); - EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( - boot_image_intrinsic_patches_, linker_patches); } else { - EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( - boot_image_method_patches_, linker_patches); + DCHECK(boot_image_method_patches_.empty()); DCHECK(boot_image_type_patches_.empty()); DCHECK(boot_image_string_patches_.empty()); - DCHECK(boot_image_intrinsic_patches_.empty()); + } + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_other_patches_, linker_patches); + } else { + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_other_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -9013,6 +9229,11 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l type_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( string_bss_entry_patches_, linker_patches); + for (const PatchInfo<vixl32::Label>& info : call_entrypoint_patches_) { + DCHECK(info.target_dex_file == nullptr); + linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch( + info.label.GetLocation(), info.offset_or_index)); + } for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch( info.label.GetLocation(), info.custom_data)); @@ -9021,7 +9242,8 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l } bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const { - return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch || + return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint || + patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch || patch.GetType() == linker::LinkerPatch::Type::kCallRelative; } @@ -9030,23 +9252,30 @@ void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch, /*out*/ std::string* debug_name) { arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator()); switch (patch.GetType()) { - case linker::LinkerPatch::Type::kCallRelative: + case linker::LinkerPatch::Type::kCallRelative: { // The thunk just uses the entry point in the ArtMethod. This works even for calls // to the generic JNI and interpreter trampolines. - assembler.LoadFromOffset( - arm::kLoadWord, - vixl32::pc, - vixl32::r0, - ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); + MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize); + assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, vixl32::r0, offset.Int32Value()); assembler.GetVIXLAssembler()->Bkpt(0); if (GetCompilerOptions().GenerateAnyDebugInfo()) { *debug_name = "MethodCallThunk"; } break; - case linker::LinkerPatch::Type::kBakerReadBarrierBranch: + } + case linker::LinkerPatch::Type::kCallEntrypoint: { + assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, tr, patch.EntrypointOffset()); + assembler.GetVIXLAssembler()->Bkpt(0); + if (GetCompilerOptions().GenerateAnyDebugInfo()) { + *debug_name = "EntrypointCallThunk_" + std::to_string(patch.EntrypointOffset()); + } + break; + } + case linker::LinkerPatch::Type::kBakerReadBarrierBranch: { DCHECK_EQ(patch.GetBakerCustomValue2(), 0u); CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name); break; + } default: LOG(FATAL) << "Unexpected patch type " << patch.GetType(); UNREACHABLE(); @@ -9357,9 +9586,12 @@ void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assemb BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip); - // If base_reg differs from holder_reg, the offset was too large and we must have emitted - // an explicit null check before the load. Otherwise, for implicit null checks, we need to - // null-check the holder as we do not necessarily do that check before going to the thunk. + // In the case of a field load, if `base_reg` differs from + // `holder_reg`, the offset was too large and we must have emitted (during the construction + // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved + // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before + // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do + // not necessarily do that check before going to the thunk. vixl32::Label throw_npe_label; vixl32::Label* throw_npe = nullptr; if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) { diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 5edca87147..48fb0827d7 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -76,8 +76,6 @@ static const size_t kParameterFpuRegistersLengthVIXL = arraysize(kParameterFpuRe static const vixl::aarch32::Register kMethodRegister = vixl::aarch32::r0; -static const vixl::aarch32::Register kCoreAlwaysSpillRegister = vixl::aarch32::r5; - // Callee saves core registers r5, r6, r7, r8 (except when emitting Baker // read barriers, where it is used as Marking Register), r10, r11, and lr. static const vixl::aarch32::RegisterList kCoreCalleeSaves = vixl::aarch32::RegisterList::Union( @@ -448,7 +446,9 @@ class CodeGeneratorARMVIXL : public CodeGenerator { return static_cast<size_t>(kArmPointerSize); } - size_t GetFloatingPointSpillSlotSize() const override { return vixl::aarch32::kRegSizeInBytes; } + size_t GetCalleePreservedFPWidth() const override { + return vixl::aarch32::kSRegSizeInBytes; + } HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } @@ -589,6 +589,9 @@ class CodeGeneratorARMVIXL : public CodeGenerator { PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, dex::StringIndex string_index); + // Emit the BL instruction for entrypoint thunk call and record the associated patch for AOT. + void EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset); + // Emit the BNE instruction for baker read barrier and record // the associated patch for AOT or slow path for JIT. void EmitBakerReadBarrierBne(uint32_t custom_data); @@ -744,6 +747,18 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Register in, vixl::aarch32::Register temp = vixl32::Register()); + void MaybeRecordImplicitNullCheck(HInstruction* instr) final { + // The function must be only be called within special scopes + // (EmissionCheckScope, ExactAssemblyScope) which prevent generation of + // veneer/literal pools by VIXL assembler. + CHECK_EQ(GetVIXLAssembler()->ArePoolsBlocked(), true) + << "The function must only be called within EmissionCheckScope or ExactAssemblyScope"; + CodeGenerator::MaybeRecordImplicitNullCheck(instr); + } + + void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl32::Register klass); + void MaybeIncrementHotness(bool is_frame_entry); + private: // Encoding of thunk type and data for link-time generated thunks for Baker read barriers. @@ -869,10 +884,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ArmVIXLAssembler assembler_; - // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. - Uint32ToLiteralMap uint32_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. - // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). + // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -884,11 +896,16 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; - // PC-relative patch info for IntrinsicObjects. - ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; + // PC-relative patch info for IntrinsicObjects for the boot image, + // and for method/type/string patches for kBootImageRelRo otherwise. + ArenaDeque<PcRelativePatchInfo> boot_image_other_patches_; + // Patch info for calls to entrypoint dispatch thunks. Used for slow paths. + ArenaDeque<PatchInfo<vixl::aarch32::Label>> call_entrypoint_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; + // Deduplication map for 32-bit literals, used for JIT for boot image addresses. + Uint32ToLiteralMap uint32_literals_; // Patches for string literals in JIT compiled code. StringToLiteralMap jit_string_patches_; // Patches for class literals in JIT compiled code. diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc deleted file mode 100644 index 72334afa40..0000000000 --- a/compiler/optimizing/code_generator_mips.cc +++ /dev/null @@ -1,10224 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "code_generator_mips.h" - -#include "arch/mips/asm_support_mips.h" -#include "arch/mips/entrypoints_direct_mips.h" -#include "arch/mips/instruction_set_features_mips.h" -#include "art_method.h" -#include "class_table.h" -#include "code_generator_utils.h" -#include "compiled_method.h" -#include "entrypoints/quick/quick_entrypoints.h" -#include "entrypoints/quick/quick_entrypoints_enum.h" -#include "gc/accounting/card_table.h" -#include "gc/space/image_space.h" -#include "heap_poisoning.h" -#include "intrinsics.h" -#include "intrinsics_mips.h" -#include "linker/linker_patch.h" -#include "mirror/array-inl.h" -#include "mirror/class-inl.h" -#include "offsets.h" -#include "stack_map_stream.h" -#include "thread.h" -#include "utils/assembler.h" -#include "utils/mips/assembler_mips.h" -#include "utils/stack_checks.h" - -namespace art { -namespace mips { - -static constexpr int kCurrentMethodStackOffset = 0; -static constexpr Register kMethodRegisterArgument = A0; - -// Flags controlling the use of thunks for Baker read barriers. -constexpr bool kBakerReadBarrierThunksEnableForFields = true; -constexpr bool kBakerReadBarrierThunksEnableForArrays = true; -constexpr bool kBakerReadBarrierThunksEnableForGcRoots = true; - -Location MipsReturnLocation(DataType::Type return_type) { - switch (return_type) { - case DataType::Type::kReference: - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kUint32: - case DataType::Type::kInt32: - return Location::RegisterLocation(V0); - - case DataType::Type::kUint64: - case DataType::Type::kInt64: - return Location::RegisterPairLocation(V0, V1); - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - return Location::FpuRegisterLocation(F0); - - case DataType::Type::kVoid: - return Location(); - } - UNREACHABLE(); -} - -Location InvokeDexCallingConventionVisitorMIPS::GetReturnLocation(DataType::Type type) const { - return MipsReturnLocation(type); -} - -Location InvokeDexCallingConventionVisitorMIPS::GetMethodLocation() const { - return Location::RegisterLocation(kMethodRegisterArgument); -} - -Location InvokeDexCallingConventionVisitorMIPS::GetNextLocation(DataType::Type type) { - Location next_location; - - switch (type) { - case DataType::Type::kReference: - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: { - uint32_t gp_index = gp_index_++; - if (gp_index < calling_convention.GetNumberOfRegisters()) { - next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index)); - } else { - size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); - next_location = Location::StackSlot(stack_offset); - } - break; - } - - case DataType::Type::kInt64: { - uint32_t gp_index = gp_index_; - gp_index_ += 2; - if (gp_index + 1 < calling_convention.GetNumberOfRegisters()) { - Register reg = calling_convention.GetRegisterAt(gp_index); - if (reg == A1 || reg == A3) { - gp_index_++; // Skip A1(A3), and use A2_A3(T0_T1) instead. - gp_index++; - } - Register low_even = calling_convention.GetRegisterAt(gp_index); - Register high_odd = calling_convention.GetRegisterAt(gp_index + 1); - DCHECK_EQ(low_even + 1, high_odd); - next_location = Location::RegisterPairLocation(low_even, high_odd); - } else { - size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); - next_location = Location::DoubleStackSlot(stack_offset); - } - break; - } - - // Note: both float and double types are stored in even FPU registers. On 32 bit FPU, double - // will take up the even/odd pair, while floats are stored in even regs only. - // On 64 bit FPU, both double and float are stored in even registers only. - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: { - uint32_t float_index = float_index_++; - if (float_index < calling_convention.GetNumberOfFpuRegisters()) { - next_location = Location::FpuRegisterLocation( - calling_convention.GetFpuRegisterAt(float_index)); - } else { - size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); - next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) - : Location::StackSlot(stack_offset); - } - break; - } - - case DataType::Type::kUint32: - case DataType::Type::kUint64: - case DataType::Type::kVoid: - LOG(FATAL) << "Unexpected parameter type " << type; - UNREACHABLE(); - } - - // Space on the stack is reserved for all arguments. - stack_index_ += DataType::Is64BitType(type) ? 2 : 1; - - return next_location; -} - -Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type type) { - return MipsReturnLocation(type); -} - -static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { - InvokeRuntimeCallingConvention calling_convention; - RegisterSet caller_saves = RegisterSet::Empty(); - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - // The reference is returned in the same register. This differs from the standard return location. - return caller_saves; -} - -// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. -#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT -#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value() - -class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS { - public: - explicit BoundsCheckSlowPathMIPS(HBoundsCheck* instruction) : SlowPathCodeMIPS(instruction) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - __ Bind(GetEntryLabel()); - if (instruction_->CanThrowIntoCatchBlock()) { - // Live registers will be restored in the catch block if caught. - SaveLiveRegisters(codegen, instruction_->GetLocations()); - } - // We're moving two locations to locations that could overlap, so we need a parallel - // move resolver. - InvokeRuntimeCallingConvention calling_convention; - codegen->EmitParallelMoves(locations->InAt(0), - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - DataType::Type::kInt32, - locations->InAt(1), - Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - DataType::Type::kInt32); - QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() - ? kQuickThrowStringBounds - : kQuickThrowArrayBounds; - mips_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); - CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); - } - - bool IsFatal() const override { return true; } - - const char* GetDescription() const override { return "BoundsCheckSlowPathMIPS"; } - - private: - DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS); -}; - -class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS { - public: - explicit DivZeroCheckSlowPathMIPS(HDivZeroCheck* instruction) : SlowPathCodeMIPS(instruction) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - __ Bind(GetEntryLabel()); - mips_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); - } - - bool IsFatal() const override { return true; } - - const char* GetDescription() const override { return "DivZeroCheckSlowPathMIPS"; } - - private: - DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS); -}; - -class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { - public: - LoadClassSlowPathMIPS(HLoadClass* cls, HInstruction* at) - : SlowPathCodeMIPS(at), cls_(cls) { - DCHECK(at->IsLoadClass() || at->IsClinitCheck()); - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); - } - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - Location out = locations->Out(); - const uint32_t dex_pc = instruction_->GetDexPc(); - bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); - bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); - - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); - - InvokeRuntimeCallingConvention calling_convention; - if (must_resolve_type) { - DCHECK(IsSameDexFile(cls_->GetDexFile(), mips_codegen->GetGraph()->GetDexFile())); - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); - mips_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); - // If we also must_do_clinit, the resolved type is now in the correct register. - } else { - DCHECK(must_do_clinit); - Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); - mips_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - source, - cls_->GetType()); - } - if (must_do_clinit) { - mips_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); - } - - // Move the class to the desired location. - if (out.IsValid()) { - DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); - DataType::Type type = instruction_->GetType(); - mips_codegen->MoveLocation(out, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - type); - } - RestoreLiveRegisters(codegen, locations); - - __ B(GetExitLabel()); - } - - const char* GetDescription() const override { return "LoadClassSlowPathMIPS"; } - - private: - // The class this slow path will load. - HLoadClass* const cls_; - - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS); -}; - -class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { - public: - explicit LoadStringSlowPathMIPS(HLoadString* instruction) - : SlowPathCodeMIPS(instruction) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - DCHECK(instruction_->IsLoadString()); - DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); - LocationSummary* locations = instruction_->GetLocations(); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); - const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - InvokeRuntimeCallingConvention calling_convention; - __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); - - __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_); - mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - - DataType::Type type = instruction_->GetType(); - mips_codegen->MoveLocation(locations->Out(), - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - type); - RestoreLiveRegisters(codegen, locations); - - __ B(GetExitLabel()); - } - - const char* GetDescription() const override { return "LoadStringSlowPathMIPS"; } - - private: - DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS); -}; - -class NullCheckSlowPathMIPS : public SlowPathCodeMIPS { - public: - explicit NullCheckSlowPathMIPS(HNullCheck* instr) : SlowPathCodeMIPS(instr) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - __ Bind(GetEntryLabel()); - if (instruction_->CanThrowIntoCatchBlock()) { - // Live registers will be restored in the catch block if caught. - SaveLiveRegisters(codegen, instruction_->GetLocations()); - } - mips_codegen->InvokeRuntime(kQuickThrowNullPointer, - instruction_, - instruction_->GetDexPc(), - this); - CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); - } - - bool IsFatal() const override { return true; } - - const char* GetDescription() const override { return "NullCheckSlowPathMIPS"; } - - private: - DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS); -}; - -class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS { - public: - SuspendCheckSlowPathMIPS(HSuspendCheck* instruction, HBasicBlock* successor) - : SlowPathCodeMIPS(instruction), successor_(successor) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); // Only saves live vector registers for SIMD. - mips_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, locations); // Only restores live vector registers for SIMD. - if (successor_ == nullptr) { - __ B(GetReturnLabel()); - } else { - __ B(mips_codegen->GetLabelOf(successor_)); - } - } - - MipsLabel* GetReturnLabel() { - DCHECK(successor_ == nullptr); - return &return_label_; - } - - const char* GetDescription() const override { return "SuspendCheckSlowPathMIPS"; } - - HBasicBlock* GetSuccessor() const { - return successor_; - } - - private: - // If not null, the block to branch to after the suspend check. - HBasicBlock* const successor_; - - // If `successor_` is null, the label to branch to after the suspend check. - MipsLabel return_label_; - - DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS); -}; - -class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { - public: - explicit TypeCheckSlowPathMIPS(HInstruction* instruction, bool is_fatal) - : SlowPathCodeMIPS(instruction), is_fatal_(is_fatal) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - uint32_t dex_pc = instruction_->GetDexPc(); - DCHECK(instruction_->IsCheckCast() - || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - - __ Bind(GetEntryLabel()); - if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { - SaveLiveRegisters(codegen, locations); - } - - // We're moving two locations to locations that could overlap, so we need a parallel - // move resolver. - InvokeRuntimeCallingConvention calling_convention; - codegen->EmitParallelMoves(locations->InAt(0), - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - DataType::Type::kReference, - locations->InAt(1), - Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - DataType::Type::kReference); - if (instruction_->IsInstanceOf()) { - mips_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); - DataType::Type ret_type = instruction_->GetType(); - Location ret_loc = calling_convention.GetReturnLocation(ret_type); - mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - } else { - DCHECK(instruction_->IsCheckCast()); - mips_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); - } - - if (!is_fatal_) { - RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); - } - } - - const char* GetDescription() const override { return "TypeCheckSlowPathMIPS"; } - - bool IsFatal() const override { return is_fatal_; } - - private: - const bool is_fatal_; - - DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS); -}; - -class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { - public: - explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction) - : SlowPathCodeMIPS(instruction) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - __ Bind(GetEntryLabel()); - LocationSummary* locations = instruction_->GetLocations(); - SaveLiveRegisters(codegen, locations); - InvokeRuntimeCallingConvention calling_convention; - __ LoadConst32(calling_convention.GetRegisterAt(0), - static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); - mips_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); - } - - const char* GetDescription() const override { return "DeoptimizationSlowPathMIPS"; } - - private: - DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS); -}; - -class ArraySetSlowPathMIPS : public SlowPathCodeMIPS { - public: - explicit ArraySetSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); - - InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); - parallel_move.AddMove( - locations->InAt(0), - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - DataType::Type::kReference, - nullptr); - parallel_move.AddMove( - locations->InAt(1), - Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - DataType::Type::kInt32, - nullptr); - parallel_move.AddMove( - locations->InAt(2), - Location::RegisterLocation(calling_convention.GetRegisterAt(2)), - DataType::Type::kReference, - nullptr); - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); - - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - mips_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); - RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); - } - - const char* GetDescription() const override { return "ArraySetSlowPathMIPS"; } - - private: - DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS); -}; - -// Slow path marking an object reference `ref` during a read -// barrier. The field `obj.field` in the object `obj` holding this -// reference does not get updated by this slow path after marking (see -// ReadBarrierMarkAndUpdateFieldSlowPathMIPS below for that). -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// If `entrypoint` is a valid location it is assumed to already be -// holding the entrypoint. The case where the entrypoint is passed in -// is for the GcRoot read barrier. -class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS { - public: - ReadBarrierMarkSlowPathMIPS(HInstruction* instruction, - Location ref, - Location entrypoint = Location::NoLocation()) - : SlowPathCodeMIPS(instruction), ref_(ref), entrypoint_(entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const override { return "ReadBarrierMarkSlowPathMIPS"; } - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - Register ref_reg = ref_.AsRegister<Register>(); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; - DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsStaticFieldGet() || - instruction_->IsArrayGet() || - instruction_->IsArraySet() || - instruction_->IsLoadClass() || - instruction_->IsLoadString() || - instruction_->IsInstanceOf() || - instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || - (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - // No need to save live registers; it's taken care of by the - // entrypoint. Also, there is no need to update the stack mask, - // as this runtime call will not trigger a garbage collection. - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - DCHECK((V0 <= ref_reg && ref_reg <= T7) || - (S2 <= ref_reg && ref_reg <= S7) || - (ref_reg == FP)) << ref_reg; - // "Compact" slow path, saving two moves. - // - // Instead of using the standard runtime calling convention (input - // and output in A0 and V0 respectively): - // - // A0 <- ref - // V0 <- ReadBarrierMark(A0) - // ref <- V0 - // - // we just use rX (the register containing `ref`) as input and output - // of a dedicated entrypoint: - // - // rX <- ReadBarrierMarkRegX(rX) - // - if (entrypoint_.IsValid()) { - mips_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); - DCHECK_EQ(entrypoint_.AsRegister<Register>(), T9); - __ Jalr(entrypoint_.AsRegister<Register>()); - __ NopIfNoReordering(); - } else { - int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1); - // This runtime call does not require a stack map. - mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, - instruction_, - this, - /* direct= */ false); - } - __ B(GetExitLabel()); - } - - private: - // The location (register) of the marked object reference. - const Location ref_; - - // The location of the entrypoint if already loaded. - const Location entrypoint_; - - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS); -}; - -// Slow path marking an object reference `ref` during a read barrier, -// and if needed, atomically updating the field `obj.field` in the -// object `obj` holding this reference after marking (contrary to -// ReadBarrierMarkSlowPathMIPS above, which never tries to update -// `obj.field`). -// -// This means that after the execution of this slow path, both `ref` -// and `obj.field` will be up-to-date; i.e., after the flip, both will -// hold the same to-space reference (unless another thread installed -// another object reference (different from `ref`) in `obj.field`). -class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS { - public: - ReadBarrierMarkAndUpdateFieldSlowPathMIPS(HInstruction* instruction, - Location ref, - Register obj, - Location field_offset, - Register temp1) - : SlowPathCodeMIPS(instruction), - ref_(ref), - obj_(obj), - field_offset_(field_offset), - temp1_(temp1) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const override { - return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS"; - } - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - Register ref_reg = ref_.AsRegister<Register>(); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; - // This slow path is only used by the UnsafeCASObject intrinsic. - DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking and field updating slow path: " - << instruction_->DebugName(); - DCHECK(instruction_->GetLocations()->Intrinsified()); - DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); - DCHECK(field_offset_.IsRegisterPair()) << field_offset_; - - __ Bind(GetEntryLabel()); - - // Save the old reference. - // Note that we cannot use AT or TMP to save the old reference, as those - // are used by the code that follows, but we need the old reference after - // the call to the ReadBarrierMarkRegX entry point. - DCHECK_NE(temp1_, AT); - DCHECK_NE(temp1_, TMP); - __ Move(temp1_, ref_reg); - - // No need to save live registers; it's taken care of by the - // entrypoint. Also, there is no need to update the stack mask, - // as this runtime call will not trigger a garbage collection. - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - DCHECK((V0 <= ref_reg && ref_reg <= T7) || - (S2 <= ref_reg && ref_reg <= S7) || - (ref_reg == FP)) << ref_reg; - // "Compact" slow path, saving two moves. - // - // Instead of using the standard runtime calling convention (input - // and output in A0 and V0 respectively): - // - // A0 <- ref - // V0 <- ReadBarrierMark(A0) - // ref <- V0 - // - // we just use rX (the register containing `ref`) as input and output - // of a dedicated entrypoint: - // - // rX <- ReadBarrierMarkRegX(rX) - // - int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1); - // This runtime call does not require a stack map. - mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, - instruction_, - this, - /* direct= */ false); - - // If the new reference is different from the old reference, - // update the field in the holder (`*(obj_ + field_offset_)`). - // - // Note that this field could also hold a different object, if - // another thread had concurrently changed it. In that case, the - // the compare-and-set (CAS) loop below would abort, leaving the - // field as-is. - MipsLabel done; - __ Beq(temp1_, ref_reg, &done); - - // Update the the holder's field atomically. This may fail if - // mutator updates before us, but it's OK. This is achieved - // using a strong compare-and-set (CAS) operation with relaxed - // memory synchronization ordering, where the expected value is - // the old reference and the desired value is the new reference. - - // Convenience aliases. - Register base = obj_; - // The UnsafeCASObject intrinsic uses a register pair as field - // offset ("long offset"), of which only the low part contains - // data. - Register offset = field_offset_.AsRegisterPairLow<Register>(); - Register expected = temp1_; - Register value = ref_reg; - Register tmp_ptr = TMP; // Pointer to actual memory. - Register tmp = AT; // Value in memory. - - __ Addu(tmp_ptr, base, offset); - - if (kPoisonHeapReferences) { - __ PoisonHeapReference(expected); - // Do not poison `value` if it is the same register as - // `expected`, which has just been poisoned. - if (value != expected) { - __ PoisonHeapReference(value); - } - } - - // do { - // tmp = [r_ptr] - expected; - // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); - - bool is_r6 = mips_codegen->GetInstructionSetFeatures().IsR6(); - MipsLabel loop_head, exit_loop; - __ Bind(&loop_head); - if (is_r6) { - __ LlR6(tmp, tmp_ptr); - } else { - __ LlR2(tmp, tmp_ptr); - } - __ Bne(tmp, expected, &exit_loop); - __ Move(tmp, value); - if (is_r6) { - __ ScR6(tmp, tmp_ptr); - } else { - __ ScR2(tmp, tmp_ptr); - } - __ Beqz(tmp, &loop_head); - __ Bind(&exit_loop); - - if (kPoisonHeapReferences) { - __ UnpoisonHeapReference(expected); - // Do not unpoison `value` if it is the same register as - // `expected`, which has just been unpoisoned. - if (value != expected) { - __ UnpoisonHeapReference(value); - } - } - - __ Bind(&done); - __ B(GetExitLabel()); - } - - private: - // The location (register) of the marked object reference. - const Location ref_; - // The register containing the object holding the marked object reference field. - const Register obj_; - // The location of the offset of the marked reference field within `obj_`. - Location field_offset_; - - const Register temp1_; - - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS); -}; - -// Slow path generating a read barrier for a heap reference. -class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS { - public: - ReadBarrierForHeapReferenceSlowPathMIPS(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) - : SlowPathCodeMIPS(instruction), - out_(out), - ref_(ref), - obj_(obj), - offset_(offset), - index_(index) { - DCHECK(kEmitCompilerReadBarrier); - // If `obj` is equal to `out` or `ref`, it means the initial object - // has been overwritten by (or after) the heap object reference load - // to be instrumented, e.g.: - // - // __ LoadFromOffset(kLoadWord, out, out, offset); - // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); - // - // In that case, we have lost the information about the original - // object, and the emitted read barrier cannot work properly. - DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; - DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; - } - - void EmitNativeCode(CodeGenerator* codegen) override { - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - LocationSummary* locations = instruction_->GetLocations(); - Register reg_out = out_.AsRegister<Register>(); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); - DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsStaticFieldGet() || - instruction_->IsArrayGet() || - instruction_->IsInstanceOf() || - instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier for heap reference slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); - - // We may have to change the index's value, but as `index_` is a - // constant member (like other "inputs" of this slow path), - // introduce a copy of it, `index`. - Location index = index_; - if (index_.IsValid()) { - // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. - if (instruction_->IsArrayGet()) { - // Compute the actual memory offset and store it in `index`. - Register index_reg = index_.AsRegister<Register>(); - DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); - if (codegen->IsCoreCalleeSaveRegister(index_reg)) { - // We are about to change the value of `index_reg` (see the - // calls to art::mips::MipsAssembler::Sll and - // art::mips::MipsAssembler::Addiu32 below), but it has - // not been saved by the previous call to - // art::SlowPathCode::SaveLiveRegisters, as it is a - // callee-save register -- - // art::SlowPathCode::SaveLiveRegisters does not consider - // callee-save registers, as it has been designed with the - // assumption that callee-save registers are supposed to be - // handled by the called function. So, as a callee-save - // register, `index_reg` _would_ eventually be saved onto - // the stack, but it would be too late: we would have - // changed its value earlier. Therefore, we manually save - // it here into another freely available register, - // `free_reg`, chosen of course among the caller-save - // registers (as a callee-save `free_reg` register would - // exhibit the same problem). - // - // Note we could have requested a temporary register from - // the register allocator instead; but we prefer not to, as - // this is a slow path, and we know we can find a - // caller-save register that is available. - Register free_reg = FindAvailableCallerSaveRegister(codegen); - __ Move(free_reg, index_reg); - index_reg = free_reg; - index = Location::RegisterLocation(index_reg); - } else { - // The initial register stored in `index_` has already been - // saved in the call to art::SlowPathCode::SaveLiveRegisters - // (as it is not a callee-save register), so we can freely - // use it. - } - // Shifting the index value contained in `index_reg` by the scale - // factor (2) cannot overflow in practice, as the runtime is - // unable to allocate object arrays with a size larger than - // 2^26 - 1 (that is, 2^28 - 4 bytes). - __ Sll(index_reg, index_reg, TIMES_4); - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - __ Addiu32(index_reg, index_reg, offset_); - } else { - // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile - // intrinsics, `index_` is not shifted by a scale factor of 2 - // (as in the case of ArrayGet), as it is actually an offset - // to an object field within an object. - DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); - DCHECK(instruction_->GetLocations()->Intrinsified()); - DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || - (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) - << instruction_->AsInvoke()->GetIntrinsic(); - DCHECK_EQ(offset_, 0U); - DCHECK(index_.IsRegisterPair()); - // UnsafeGet's offset location is a register pair, the low - // part contains the correct offset. - index = index_.ToLow(); - } - } - - // We're moving two or three locations to locations that could - // overlap, so we need a parallel move resolver. - InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); - parallel_move.AddMove(ref_, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - DataType::Type::kReference, - nullptr); - parallel_move.AddMove(obj_, - Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - DataType::Type::kReference, - nullptr); - if (index.IsValid()) { - parallel_move.AddMove(index, - Location::RegisterLocation(calling_convention.GetRegisterAt(2)), - DataType::Type::kInt32, - nullptr); - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); - } else { - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); - __ LoadConst32(calling_convention.GetRegisterAt(2), offset_); - } - mips_codegen->InvokeRuntime(kQuickReadBarrierSlow, - instruction_, - instruction_->GetDexPc(), - this); - CheckEntrypointTypes< - kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); - mips_codegen->MoveLocation(out_, - calling_convention.GetReturnLocation(DataType::Type::kReference), - DataType::Type::kReference); - - RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); - } - - const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathMIPS"; } - - private: - Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { - size_t ref = static_cast<int>(ref_.AsRegister<Register>()); - size_t obj = static_cast<int>(obj_.AsRegister<Register>()); - for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { - if (i != ref && - i != obj && - !codegen->IsCoreCalleeSaveRegister(i) && - !codegen->IsBlockedCoreRegister(i)) { - return static_cast<Register>(i); - } - } - // We shall never fail to find a free caller-save register, as - // there are more than two core caller-save registers on MIPS - // (meaning it is possible to find one which is different from - // `ref` and `obj`). - DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); - LOG(FATAL) << "Could not find a free caller-save register"; - UNREACHABLE(); - } - - const Location out_; - const Location ref_; - const Location obj_; - const uint32_t offset_; - // An additional location containing an index to an array. - // Only used for HArrayGet and the UnsafeGetObject & - // UnsafeGetObjectVolatile intrinsics. - const Location index_; - - DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS); -}; - -// Slow path generating a read barrier for a GC root. -class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS { - public: - ReadBarrierForRootSlowPathMIPS(HInstruction* instruction, Location out, Location root) - : SlowPathCodeMIPS(instruction), out_(out), root_(root) { - DCHECK(kEmitCompilerReadBarrier); - } - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - Register reg_out = out_.AsRegister<Register>(); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) - << "Unexpected instruction in read barrier for GC root slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); - - InvokeRuntimeCallingConvention calling_convention; - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - mips_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - root_, - DataType::Type::kReference); - mips_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, - instruction_, - instruction_->GetDexPc(), - this); - CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); - mips_codegen->MoveLocation(out_, - calling_convention.GetReturnLocation(DataType::Type::kReference), - DataType::Type::kReference); - - RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); - } - - const char* GetDescription() const override { return "ReadBarrierForRootSlowPathMIPS"; } - - private: - const Location out_; - const Location root_; - - DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS); -}; - -CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, - const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats) - : CodeGenerator(graph, - kNumberOfCoreRegisters, - kNumberOfFRegisters, - kNumberOfRegisterPairs, - ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), - arraysize(kCoreCalleeSaves)), - ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), - arraysize(kFpuCalleeSaves)), - compiler_options, - stats), - block_labels_(nullptr), - location_builder_(graph, this), - instruction_visitor_(graph, this), - move_resolver_(graph->GetAllocator(), this), - assembler_(graph->GetAllocator(), - compiler_options.GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()), - uint32_literals_(std::less<uint32_t>(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - clobbered_ra_(false) { - // Save RA (containing the return address) to mimic Quick. - AddAllocatedRegister(Location::RegisterLocation(RA)); -} - -#undef __ -// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. -#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT -#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value() - -void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) { - // Ensure that we fix up branches. - __ FinalizeCode(); - - // Adjust native pc offsets in stack maps. - StackMapStream* stack_map_stream = GetStackMapStream(); - for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) { - uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i); - uint32_t new_position = __ GetAdjustedPosition(old_position); - DCHECK_GE(new_position, old_position); - stack_map_stream->SetStackMapNativePcOffset(i, new_position); - } - - // Adjust pc offsets for the disassembly information. - if (disasm_info_ != nullptr) { - GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval(); - frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start); - frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end); - for (auto& it : *disasm_info_->GetInstructionIntervals()) { - it.second.start = __ GetAdjustedPosition(it.second.start); - it.second.end = __ GetAdjustedPosition(it.second.end); - } - for (auto& it : *disasm_info_->GetSlowPathIntervals()) { - it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start); - it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end); - } - } - - CodeGenerator::Finalize(allocator); -} - -MipsAssembler* ParallelMoveResolverMIPS::GetAssembler() const { - return codegen_->GetAssembler(); -} - -void ParallelMoveResolverMIPS::EmitMove(size_t index) { - DCHECK_LT(index, moves_.size()); - MoveOperands* move = moves_[index]; - codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType()); -} - -void ParallelMoveResolverMIPS::EmitSwap(size_t index) { - DCHECK_LT(index, moves_.size()); - MoveOperands* move = moves_[index]; - DataType::Type type = move->GetType(); - Location loc1 = move->GetDestination(); - Location loc2 = move->GetSource(); - - DCHECK(!loc1.IsConstant()); - DCHECK(!loc2.IsConstant()); - - if (loc1.Equals(loc2)) { - return; - } - - if (loc1.IsRegister() && loc2.IsRegister()) { - // Swap 2 GPRs. - Register r1 = loc1.AsRegister<Register>(); - Register r2 = loc2.AsRegister<Register>(); - __ Move(TMP, r2); - __ Move(r2, r1); - __ Move(r1, TMP); - } else if (loc1.IsFpuRegister() && loc2.IsFpuRegister()) { - if (codegen_->GetGraph()->HasSIMD()) { - __ MoveV(static_cast<VectorRegister>(FTMP), VectorRegisterFrom(loc1)); - __ MoveV(VectorRegisterFrom(loc1), VectorRegisterFrom(loc2)); - __ MoveV(VectorRegisterFrom(loc2), static_cast<VectorRegister>(FTMP)); - } else { - FRegister f1 = loc1.AsFpuRegister<FRegister>(); - FRegister f2 = loc2.AsFpuRegister<FRegister>(); - if (type == DataType::Type::kFloat32) { - __ MovS(FTMP, f2); - __ MovS(f2, f1); - __ MovS(f1, FTMP); - } else { - DCHECK_EQ(type, DataType::Type::kFloat64); - __ MovD(FTMP, f2); - __ MovD(f2, f1); - __ MovD(f1, FTMP); - } - } - } else if ((loc1.IsRegister() && loc2.IsFpuRegister()) || - (loc1.IsFpuRegister() && loc2.IsRegister())) { - // Swap FPR and GPR. - DCHECK_EQ(type, DataType::Type::kFloat32); // Can only swap a float. - FRegister f1 = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>() - : loc2.AsFpuRegister<FRegister>(); - Register r2 = loc1.IsRegister() ? loc1.AsRegister<Register>() : loc2.AsRegister<Register>(); - __ Move(TMP, r2); - __ Mfc1(r2, f1); - __ Mtc1(TMP, f1); - } else if (loc1.IsRegisterPair() && loc2.IsRegisterPair()) { - // Swap 2 GPR register pairs. - Register r1 = loc1.AsRegisterPairLow<Register>(); - Register r2 = loc2.AsRegisterPairLow<Register>(); - __ Move(TMP, r2); - __ Move(r2, r1); - __ Move(r1, TMP); - r1 = loc1.AsRegisterPairHigh<Register>(); - r2 = loc2.AsRegisterPairHigh<Register>(); - __ Move(TMP, r2); - __ Move(r2, r1); - __ Move(r1, TMP); - } else if ((loc1.IsRegisterPair() && loc2.IsFpuRegister()) || - (loc1.IsFpuRegister() && loc2.IsRegisterPair())) { - // Swap FPR and GPR register pair. - DCHECK_EQ(type, DataType::Type::kFloat64); - FRegister f1 = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>() - : loc2.AsFpuRegister<FRegister>(); - Register r2_l = loc1.IsRegisterPair() ? loc1.AsRegisterPairLow<Register>() - : loc2.AsRegisterPairLow<Register>(); - Register r2_h = loc1.IsRegisterPair() ? loc1.AsRegisterPairHigh<Register>() - : loc2.AsRegisterPairHigh<Register>(); - // Use 2 temporary registers because we can't first swap the low 32 bits of an FPR and - // then swap the high 32 bits of the same FPR. mtc1 makes the high 32 bits of an FPR - // unpredictable and the following mfch1 will fail. - __ Mfc1(TMP, f1); - __ MoveFromFpuHigh(AT, f1); - __ Mtc1(r2_l, f1); - __ MoveToFpuHigh(r2_h, f1); - __ Move(r2_l, TMP); - __ Move(r2_h, AT); - } else if (loc1.IsStackSlot() && loc2.IsStackSlot()) { - Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot= */ false); - } else if (loc1.IsDoubleStackSlot() && loc2.IsDoubleStackSlot()) { - Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot= */ true); - } else if (loc1.IsSIMDStackSlot() && loc2.IsSIMDStackSlot()) { - ExchangeQuadSlots(loc1.GetStackIndex(), loc2.GetStackIndex()); - } else if ((loc1.IsRegister() && loc2.IsStackSlot()) || - (loc1.IsStackSlot() && loc2.IsRegister())) { - Register reg = loc1.IsRegister() ? loc1.AsRegister<Register>() : loc2.AsRegister<Register>(); - intptr_t offset = loc1.IsStackSlot() ? loc1.GetStackIndex() : loc2.GetStackIndex(); - __ Move(TMP, reg); - __ LoadFromOffset(kLoadWord, reg, SP, offset); - __ StoreToOffset(kStoreWord, TMP, SP, offset); - } else if ((loc1.IsRegisterPair() && loc2.IsDoubleStackSlot()) || - (loc1.IsDoubleStackSlot() && loc2.IsRegisterPair())) { - Register reg_l = loc1.IsRegisterPair() ? loc1.AsRegisterPairLow<Register>() - : loc2.AsRegisterPairLow<Register>(); - Register reg_h = loc1.IsRegisterPair() ? loc1.AsRegisterPairHigh<Register>() - : loc2.AsRegisterPairHigh<Register>(); - intptr_t offset_l = loc1.IsDoubleStackSlot() ? loc1.GetStackIndex() : loc2.GetStackIndex(); - intptr_t offset_h = loc1.IsDoubleStackSlot() ? loc1.GetHighStackIndex(kMipsWordSize) - : loc2.GetHighStackIndex(kMipsWordSize); - __ Move(TMP, reg_l); - __ LoadFromOffset(kLoadWord, reg_l, SP, offset_l); - __ StoreToOffset(kStoreWord, TMP, SP, offset_l); - __ Move(TMP, reg_h); - __ LoadFromOffset(kLoadWord, reg_h, SP, offset_h); - __ StoreToOffset(kStoreWord, TMP, SP, offset_h); - } else if ((loc1.IsFpuRegister() && loc2.IsSIMDStackSlot()) || - (loc1.IsSIMDStackSlot() && loc2.IsFpuRegister())) { - Location fp_loc = loc1.IsFpuRegister() ? loc1 : loc2; - intptr_t offset = loc1.IsFpuRegister() ? loc2.GetStackIndex() : loc1.GetStackIndex(); - __ MoveV(static_cast<VectorRegister>(FTMP), VectorRegisterFrom(fp_loc)); - __ LoadQFromOffset(fp_loc.AsFpuRegister<FRegister>(), SP, offset); - __ StoreQToOffset(FTMP, SP, offset); - } else if (loc1.IsFpuRegister() || loc2.IsFpuRegister()) { - FRegister reg = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>() - : loc2.AsFpuRegister<FRegister>(); - intptr_t offset = loc1.IsFpuRegister() ? loc2.GetStackIndex() : loc1.GetStackIndex(); - if (type == DataType::Type::kFloat32) { - __ MovS(FTMP, reg); - __ LoadSFromOffset(reg, SP, offset); - __ StoreSToOffset(FTMP, SP, offset); - } else { - DCHECK_EQ(type, DataType::Type::kFloat64); - __ MovD(FTMP, reg); - __ LoadDFromOffset(reg, SP, offset); - __ StoreDToOffset(FTMP, SP, offset); - } - } else { - LOG(FATAL) << "Swap between " << loc1 << " and " << loc2 << " is unsupported"; - } -} - -void ParallelMoveResolverMIPS::RestoreScratch(int reg) { - __ Pop(static_cast<Register>(reg)); -} - -void ParallelMoveResolverMIPS::SpillScratch(int reg) { - __ Push(static_cast<Register>(reg)); -} - -void ParallelMoveResolverMIPS::Exchange(int index1, int index2, bool double_slot) { - // Allocate a scratch register other than TMP, if available. - // Else, spill V0 (arbitrary choice) and use it as a scratch register (it will be - // automatically unspilled when the scratch scope object is destroyed). - ScratchRegisterScope ensure_scratch(this, TMP, V0, codegen_->GetNumberOfCoreRegisters()); - // If V0 spills onto the stack, SP-relative offsets need to be adjusted. - int stack_offset = ensure_scratch.IsSpilled() ? kStackAlignment : 0; - for (int i = 0; i <= (double_slot ? 1 : 0); i++, stack_offset += kMipsWordSize) { - __ LoadFromOffset(kLoadWord, - Register(ensure_scratch.GetRegister()), - SP, - index1 + stack_offset); - __ LoadFromOffset(kLoadWord, - TMP, - SP, - index2 + stack_offset); - __ StoreToOffset(kStoreWord, - Register(ensure_scratch.GetRegister()), - SP, - index2 + stack_offset); - __ StoreToOffset(kStoreWord, TMP, SP, index1 + stack_offset); - } -} - -void ParallelMoveResolverMIPS::ExchangeQuadSlots(int index1, int index2) { - __ LoadQFromOffset(FTMP, SP, index1); - __ LoadQFromOffset(FTMP2, SP, index2); - __ StoreQToOffset(FTMP, SP, index2); - __ StoreQToOffset(FTMP2, SP, index1); -} - -void CodeGeneratorMIPS::ComputeSpillMask() { - core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; - fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; - DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; - // If there're FPU callee-saved registers and there's an odd number of GPR callee-saved - // registers, include the ZERO register to force alignment of FPU callee-saved registers - // within the stack frame. - if ((fpu_spill_mask_ != 0) && (POPCOUNT(core_spill_mask_) % 2 != 0)) { - core_spill_mask_ |= (1 << ZERO); - } -} - -bool CodeGeneratorMIPS::HasAllocatedCalleeSaveRegisters() const { - // If RA is clobbered by PC-relative operations on R2 and it's the only spilled register - // (this can happen in leaf methods), force CodeGenerator::InitializeCodeGeneration() - // into the path that creates a stack frame so that RA can be explicitly saved and restored. - // RA can't otherwise be saved/restored when it's the only spilled register. - return CodeGenerator::HasAllocatedCalleeSaveRegisters() || clobbered_ra_; -} - -static dwarf::Reg DWARFReg(Register reg) { - return dwarf::Reg::MipsCore(static_cast<int>(reg)); -} - -// TODO: mapping of floating-point registers to DWARF. - -void CodeGeneratorMIPS::GenerateFrameEntry() { - __ Bind(&frame_entry_label_); - - if (GetCompilerOptions().CountHotnessInCompiledCode()) { - __ Lhu(TMP, kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()); - __ Addiu(TMP, TMP, 1); - __ Sh(TMP, kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()); - } - - bool do_overflow_check = - FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kMips) || !IsLeafMethod(); - - if (do_overflow_check) { - __ LoadFromOffset(kLoadWord, - ZERO, - SP, - -static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kMips))); - RecordPcInfo(nullptr, 0); - } - - if (HasEmptyFrame()) { - CHECK_EQ(fpu_spill_mask_, 0u); - CHECK_EQ(core_spill_mask_, 1u << RA); - CHECK(!clobbered_ra_); - return; - } - - // Make sure the frame size isn't unreasonably large. - if (GetFrameSize() > GetStackOverflowReservedBytes(InstructionSet::kMips)) { - LOG(FATAL) << "Stack frame larger than " - << GetStackOverflowReservedBytes(InstructionSet::kMips) << " bytes"; - } - - // Spill callee-saved registers. - - uint32_t ofs = GetFrameSize(); - __ IncreaseFrameSize(ofs); - - for (uint32_t mask = core_spill_mask_; mask != 0; ) { - Register reg = static_cast<Register>(MostSignificantBit(mask)); - mask ^= 1u << reg; - ofs -= kMipsWordSize; - // The ZERO register is only included for alignment. - if (reg != ZERO) { - __ StoreToOffset(kStoreWord, reg, SP, ofs); - __ cfi().RelOffset(DWARFReg(reg), ofs); - } - } - - for (uint32_t mask = fpu_spill_mask_; mask != 0; ) { - FRegister reg = static_cast<FRegister>(MostSignificantBit(mask)); - mask ^= 1u << reg; - ofs -= kMipsDoublewordSize; - __ StoreDToOffset(reg, SP, ofs); - // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs); - } - - // Save the current method if we need it. Note that we do not - // do this in HCurrentMethod, as the instruction might have been removed - // in the SSA graph. - if (RequiresCurrentMethod()) { - __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, kCurrentMethodStackOffset); - } - - if (GetGraph()->HasShouldDeoptimizeFlag()) { - // Initialize should deoptimize flag to 0. - __ StoreToOffset(kStoreWord, ZERO, SP, GetStackOffsetOfShouldDeoptimizeFlag()); - } -} - -void CodeGeneratorMIPS::GenerateFrameExit() { - __ cfi().RememberState(); - - if (!HasEmptyFrame()) { - // Restore callee-saved registers. - - // For better instruction scheduling restore RA before other registers. - uint32_t ofs = GetFrameSize(); - for (uint32_t mask = core_spill_mask_; mask != 0; ) { - Register reg = static_cast<Register>(MostSignificantBit(mask)); - mask ^= 1u << reg; - ofs -= kMipsWordSize; - // The ZERO register is only included for alignment. - if (reg != ZERO) { - __ LoadFromOffset(kLoadWord, reg, SP, ofs); - __ cfi().Restore(DWARFReg(reg)); - } - } - - for (uint32_t mask = fpu_spill_mask_; mask != 0; ) { - FRegister reg = static_cast<FRegister>(MostSignificantBit(mask)); - mask ^= 1u << reg; - ofs -= kMipsDoublewordSize; - __ LoadDFromOffset(reg, SP, ofs); - // TODO: __ cfi().Restore(DWARFReg(reg)); - } - - size_t frame_size = GetFrameSize(); - // Adjust the stack pointer in the delay slot if doing so doesn't break CFI. - bool exchange = IsInt<16>(static_cast<int32_t>(frame_size)); - bool reordering = __ SetReorder(false); - if (exchange) { - __ Jr(RA); - __ DecreaseFrameSize(frame_size); // Single instruction in delay slot. - } else { - __ DecreaseFrameSize(frame_size); - __ Jr(RA); - __ Nop(); // In delay slot. - } - __ SetReorder(reordering); - } else { - __ Jr(RA); - __ NopIfNoReordering(); - } - - __ cfi().RestoreState(); - __ cfi().DefCFAOffset(GetFrameSize()); -} - -void CodeGeneratorMIPS::Bind(HBasicBlock* block) { - __ Bind(GetLabelOf(block)); -} - -VectorRegister VectorRegisterFrom(Location location) { - DCHECK(location.IsFpuRegister()); - return static_cast<VectorRegister>(location.AsFpuRegister<FRegister>()); -} - -void CodeGeneratorMIPS::MoveLocation(Location destination, - Location source, - DataType::Type dst_type) { - if (source.Equals(destination)) { - return; - } - - if (source.IsConstant()) { - MoveConstant(destination, source.GetConstant()); - } else { - if (destination.IsRegister()) { - if (source.IsRegister()) { - __ Move(destination.AsRegister<Register>(), source.AsRegister<Register>()); - } else if (source.IsFpuRegister()) { - __ Mfc1(destination.AsRegister<Register>(), source.AsFpuRegister<FRegister>()); - } else { - DCHECK(source.IsStackSlot()) << "Cannot move from " << source << " to " << destination; - __ LoadFromOffset(kLoadWord, destination.AsRegister<Register>(), SP, source.GetStackIndex()); - } - } else if (destination.IsRegisterPair()) { - if (source.IsRegisterPair()) { - __ Move(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>()); - __ Move(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>()); - } else if (source.IsFpuRegister()) { - Register dst_high = destination.AsRegisterPairHigh<Register>(); - Register dst_low = destination.AsRegisterPairLow<Register>(); - FRegister src = source.AsFpuRegister<FRegister>(); - __ Mfc1(dst_low, src); - __ MoveFromFpuHigh(dst_high, src); - } else { - DCHECK(source.IsDoubleStackSlot()) - << "Cannot move from " << source << " to " << destination; - int32_t off = source.GetStackIndex(); - Register r = destination.AsRegisterPairLow<Register>(); - __ LoadFromOffset(kLoadDoubleword, r, SP, off); - } - } else if (destination.IsFpuRegister()) { - if (source.IsRegister()) { - DCHECK(!DataType::Is64BitType(dst_type)); - __ Mtc1(source.AsRegister<Register>(), destination.AsFpuRegister<FRegister>()); - } else if (source.IsRegisterPair()) { - DCHECK(DataType::Is64BitType(dst_type)); - FRegister dst = destination.AsFpuRegister<FRegister>(); - Register src_high = source.AsRegisterPairHigh<Register>(); - Register src_low = source.AsRegisterPairLow<Register>(); - __ Mtc1(src_low, dst); - __ MoveToFpuHigh(src_high, dst); - } else if (source.IsFpuRegister()) { - if (GetGraph()->HasSIMD()) { - __ MoveV(VectorRegisterFrom(destination), - VectorRegisterFrom(source)); - } else { - if (DataType::Is64BitType(dst_type)) { - __ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>()); - } else { - DCHECK_EQ(dst_type, DataType::Type::kFloat32); - __ MovS(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>()); - } - } - } else if (source.IsSIMDStackSlot()) { - __ LoadQFromOffset(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex()); - } else if (source.IsDoubleStackSlot()) { - DCHECK(DataType::Is64BitType(dst_type)); - __ LoadDFromOffset(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex()); - } else { - DCHECK(!DataType::Is64BitType(dst_type)); - DCHECK(source.IsStackSlot()) << "Cannot move from " << source << " to " << destination; - __ LoadSFromOffset(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex()); - } - } else if (destination.IsSIMDStackSlot()) { - if (source.IsFpuRegister()) { - __ StoreQToOffset(source.AsFpuRegister<FRegister>(), SP, destination.GetStackIndex()); - } else { - DCHECK(source.IsSIMDStackSlot()); - __ LoadQFromOffset(FTMP, SP, source.GetStackIndex()); - __ StoreQToOffset(FTMP, SP, destination.GetStackIndex()); - } - } else if (destination.IsDoubleStackSlot()) { - int32_t dst_offset = destination.GetStackIndex(); - if (source.IsRegisterPair()) { - __ StoreToOffset(kStoreDoubleword, source.AsRegisterPairLow<Register>(), SP, dst_offset); - } else if (source.IsFpuRegister()) { - __ StoreDToOffset(source.AsFpuRegister<FRegister>(), SP, dst_offset); - } else { - DCHECK(source.IsDoubleStackSlot()) - << "Cannot move from " << source << " to " << destination; - __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex()); - __ StoreToOffset(kStoreWord, TMP, SP, dst_offset); - __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex() + 4); - __ StoreToOffset(kStoreWord, TMP, SP, dst_offset + 4); - } - } else { - DCHECK(destination.IsStackSlot()) << destination; - int32_t dst_offset = destination.GetStackIndex(); - if (source.IsRegister()) { - __ StoreToOffset(kStoreWord, source.AsRegister<Register>(), SP, dst_offset); - } else if (source.IsFpuRegister()) { - __ StoreSToOffset(source.AsFpuRegister<FRegister>(), SP, dst_offset); - } else { - DCHECK(source.IsStackSlot()) << "Cannot move from " << source << " to " << destination; - __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex()); - __ StoreToOffset(kStoreWord, TMP, SP, dst_offset); - } - } - } -} - -void CodeGeneratorMIPS::MoveConstant(Location destination, HConstant* c) { - if (c->IsIntConstant() || c->IsNullConstant()) { - // Move 32 bit constant. - int32_t value = GetInt32ValueOf(c); - if (destination.IsRegister()) { - Register dst = destination.AsRegister<Register>(); - __ LoadConst32(dst, value); - } else { - DCHECK(destination.IsStackSlot()) - << "Cannot move " << c->DebugName() << " to " << destination; - __ StoreConstToOffset(kStoreWord, value, SP, destination.GetStackIndex(), TMP); - } - } else if (c->IsLongConstant()) { - // Move 64 bit constant. - int64_t value = GetInt64ValueOf(c); - if (destination.IsRegisterPair()) { - Register r_h = destination.AsRegisterPairHigh<Register>(); - Register r_l = destination.AsRegisterPairLow<Register>(); - __ LoadConst64(r_h, r_l, value); - } else { - DCHECK(destination.IsDoubleStackSlot()) - << "Cannot move " << c->DebugName() << " to " << destination; - __ StoreConstToOffset(kStoreDoubleword, value, SP, destination.GetStackIndex(), TMP); - } - } else if (c->IsFloatConstant()) { - // Move 32 bit float constant. - int32_t value = GetInt32ValueOf(c); - if (destination.IsFpuRegister()) { - __ LoadSConst32(destination.AsFpuRegister<FRegister>(), value, TMP); - } else { - DCHECK(destination.IsStackSlot()) - << "Cannot move " << c->DebugName() << " to " << destination; - __ StoreConstToOffset(kStoreWord, value, SP, destination.GetStackIndex(), TMP); - } - } else { - // Move 64 bit double constant. - DCHECK(c->IsDoubleConstant()) << c->DebugName(); - int64_t value = GetInt64ValueOf(c); - if (destination.IsFpuRegister()) { - FRegister fd = destination.AsFpuRegister<FRegister>(); - __ LoadDConst64(fd, value, TMP); - } else { - DCHECK(destination.IsDoubleStackSlot()) - << "Cannot move " << c->DebugName() << " to " << destination; - __ StoreConstToOffset(kStoreDoubleword, value, SP, destination.GetStackIndex(), TMP); - } - } -} - -void CodeGeneratorMIPS::MoveConstant(Location destination, int32_t value) { - DCHECK(destination.IsRegister()); - Register dst = destination.AsRegister<Register>(); - __ LoadConst32(dst, value); -} - -void CodeGeneratorMIPS::AddLocationAsTemp(Location location, LocationSummary* locations) { - if (location.IsRegister()) { - locations->AddTemp(location); - } else if (location.IsRegisterPair()) { - locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>())); - locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>())); - } else { - UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; - } -} - -template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> -inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( - const ArenaDeque<PcRelativePatchInfo>& infos, - ArenaVector<linker::LinkerPatch>* linker_patches) { - for (const PcRelativePatchInfo& info : infos) { - const DexFile* dex_file = info.target_dex_file; - size_t offset_or_index = info.offset_or_index; - DCHECK(info.label.IsBound()); - uint32_t literal_offset = __ GetLabelLocation(&info.label); - // On R2 we use HMipsComputeBaseMethodAddress and patch relative to - // the assembler's base label used for PC-relative addressing. - const PcRelativePatchInfo& info_high = info.patch_info_high ? *info.patch_info_high : info; - uint32_t pc_rel_offset = info_high.pc_rel_label.IsBound() - ? __ GetLabelLocation(&info_high.pc_rel_label) - : __ GetPcRelBaseLabelLocation(); - linker_patches->push_back(Factory(literal_offset, dex_file, pc_rel_offset, offset_or_index)); - } -} - -template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> -linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t pc_insn_offset, - uint32_t boot_image_offset) { - DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. - return Factory(literal_offset, pc_insn_offset, boot_image_offset); -} - -void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { - DCHECK(linker_patches->empty()); - size_t size = - boot_image_method_patches_.size() + - method_bss_entry_patches_.size() + - boot_image_type_patches_.size() + - type_bss_entry_patches_.size() + - boot_image_string_patches_.size() + - string_bss_entry_patches_.size() + - boot_image_intrinsic_patches_.size(); - linker_patches->reserve(size); - if (GetCompilerOptions().IsBootImage()) { - EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( - boot_image_method_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - boot_image_string_patches_, linker_patches); - EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( - boot_image_intrinsic_patches_, linker_patches); - } else { - EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( - boot_image_method_patches_, linker_patches); - DCHECK(boot_image_type_patches_.empty()); - DCHECK(boot_image_string_patches_.empty()); - DCHECK(boot_image_intrinsic_patches_.empty()); - } - EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( - method_bss_entry_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( - type_bss_entry_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( - string_bss_entry_patches_, linker_patches); - DCHECK_EQ(size, linker_patches->size()); -} - -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageIntrinsicPatch( - uint32_t intrinsic_data, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch( - /* dex_file= */ nullptr, intrinsic_data, info_high, &boot_image_intrinsic_patches_); -} - -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageRelRoPatch( - uint32_t boot_image_offset, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch( - /* dex_file= */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_); -} - -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageMethodPatch( - MethodReference target_method, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch( - target_method.dex_file, target_method.index, info_high, &boot_image_method_patches_); -} - -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewMethodBssEntryPatch( - MethodReference target_method, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch( - target_method.dex_file, target_method.index, info_high, &method_bss_entry_patches_); -} - -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageTypePatch( - const DexFile& dex_file, - dex::TypeIndex type_index, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &boot_image_type_patches_); -} - -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewTypeBssEntryPatch( - const DexFile& dex_file, - dex::TypeIndex type_index, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &type_bss_entry_patches_); -} - -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageStringPatch( - const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch( - &dex_file, string_index.index_, info_high, &boot_image_string_patches_); -} - -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewStringBssEntryPatch( - const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &string_bss_entry_patches_); -} - -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( - const DexFile* dex_file, - uint32_t offset_or_index, - const PcRelativePatchInfo* info_high, - ArenaDeque<PcRelativePatchInfo>* patches) { - patches->emplace_back(dex_file, offset_or_index, info_high); - return &patches->back(); -} - -Literal* CodeGeneratorMIPS::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) { - return map->GetOrCreate( - value, - [this, value]() { return __ NewLiteral<uint32_t>(value); }); -} - -Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) { - return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_); -} - -void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, - Register out, - Register base) { - DCHECK(!info_high->patch_info_high); - DCHECK_NE(out, base); - bool reordering = __ SetReorder(false); - if (GetInstructionSetFeatures().IsR6()) { - DCHECK_EQ(base, ZERO); - __ Bind(&info_high->label); - __ Bind(&info_high->pc_rel_label); - // Add the high half of a 32-bit offset to PC. - __ Auipc(out, /* imm16= */ 0x1234); - __ SetReorder(reordering); - } else { - // If base is ZERO, emit NAL to obtain the actual base. - if (base == ZERO) { - // Generate a dummy PC-relative call to obtain PC. - __ Nal(); - } - __ Bind(&info_high->label); - __ Lui(out, /* imm16= */ 0x1234); - // If we emitted the NAL, bind the pc_rel_label, otherwise base is a register holding - // the HMipsComputeBaseMethodAddress which has its own label stored in MipsAssembler. - if (base == ZERO) { - __ Bind(&info_high->pc_rel_label); - } - __ SetReorder(reordering); - // Add the high half of a 32-bit offset to PC. - __ Addu(out, out, (base == ZERO) ? RA : base); - } - // A following instruction will add the sign-extended low half of the 32-bit - // offset to `out` (e.g. lw, jialc, addiu). -} - -void CodeGeneratorMIPS::LoadBootImageAddress(Register reg, uint32_t boot_image_reference) { - if (GetCompilerOptions().IsBootImage()) { - PcRelativePatchInfo* info_high = NewBootImageIntrinsicPatch(boot_image_reference); - PcRelativePatchInfo* info_low = NewBootImageIntrinsicPatch(boot_image_reference, info_high); - EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, /* base= */ ZERO); - __ Addiu(reg, TMP, /* imm16= */ 0x5678, &info_low->label); - } else if (GetCompilerOptions().GetCompilePic()) { - PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_reference); - PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_reference, info_high); - EmitPcRelativeAddressPlaceholderHigh(info_high, reg, /* base= */ ZERO); - __ Lw(reg, reg, /* imm16= */ 0x5678, &info_low->label); - } else { - DCHECK(Runtime::Current()->UseJitCompilation()); - gc::Heap* heap = Runtime::Current()->GetHeap(); - DCHECK(!heap->GetBootImageSpaces().empty()); - const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; - __ LoadConst32(reg, dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))); - } -} - -void CodeGeneratorMIPS::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, - uint32_t boot_image_offset) { - DCHECK(invoke->IsStatic()); - InvokeRuntimeCallingConvention calling_convention; - Register argument = calling_convention.GetRegisterAt(0); - if (GetCompilerOptions().IsBootImage()) { - DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); - // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. - MethodReference target_method = invoke->GetTargetMethod(); - dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; - PcRelativePatchInfo* info_high = NewBootImageTypePatch(*target_method.dex_file, type_idx); - PcRelativePatchInfo* info_low = - NewBootImageTypePatch(*target_method.dex_file, type_idx, info_high); - EmitPcRelativeAddressPlaceholderHigh(info_high, argument, /* base= */ ZERO); - __ Addiu(argument, argument, /* imm16= */ 0x5678, &info_low->label); - } else { - LoadBootImageAddress(argument, boot_image_offset); - } - InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); -} - -CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch( - const DexFile& dex_file, - dex::StringIndex string_index, - Handle<mirror::String> handle) { - ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); - jit_string_patches_.emplace_back(dex_file, string_index.index_); - return &jit_string_patches_.back(); -} - -CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootClassPatch( - const DexFile& dex_file, - dex::TypeIndex type_index, - Handle<mirror::Class> handle) { - ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); - jit_class_patches_.emplace_back(dex_file, type_index.index_); - return &jit_class_patches_.back(); -} - -void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code, - const uint8_t* roots_data, - const CodeGeneratorMIPS::JitPatchInfo& info, - uint64_t index_in_table) const { - uint32_t high_literal_offset = GetAssembler().GetLabelLocation(&info.high_label); - uint32_t low_literal_offset = GetAssembler().GetLabelLocation(&info.low_label); - uintptr_t address = - reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); - uint32_t addr32 = dchecked_integral_cast<uint32_t>(address); - // lui reg, addr32_high - DCHECK_EQ(code[high_literal_offset + 0], 0x34); - DCHECK_EQ(code[high_literal_offset + 1], 0x12); - DCHECK_EQ((code[high_literal_offset + 2] & 0xE0), 0x00); - DCHECK_EQ(code[high_literal_offset + 3], 0x3C); - // instr reg, reg, addr32_low - DCHECK_EQ(code[low_literal_offset + 0], 0x78); - DCHECK_EQ(code[low_literal_offset + 1], 0x56); - addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "instr reg, reg, addr32_low". - // lui reg, addr32_high - code[high_literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16); - code[high_literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24); - // instr reg, reg, addr32_low - code[low_literal_offset + 0] = static_cast<uint8_t>(addr32 >> 0); - code[low_literal_offset + 1] = static_cast<uint8_t>(addr32 >> 8); -} - -void CodeGeneratorMIPS::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { - for (const JitPatchInfo& info : jit_string_patches_) { - StringReference string_reference(&info.target_dex_file, dex::StringIndex(info.index)); - uint64_t index_in_table = GetJitStringRootIndex(string_reference); - PatchJitRootUse(code, roots_data, info, index_in_table); - } - for (const JitPatchInfo& info : jit_class_patches_) { - TypeReference type_reference(&info.target_dex_file, dex::TypeIndex(info.index)); - uint64_t index_in_table = GetJitClassRootIndex(type_reference); - PatchJitRootUse(code, roots_data, info, index_in_table); - } -} - -void CodeGeneratorMIPS::MarkGCCard(Register object, - Register value, - bool value_can_be_null) { - MipsLabel done; - Register card = AT; - Register temp = TMP; - if (value_can_be_null) { - __ Beqz(value, &done); - } - // Load the address of the card table into `card`. - __ LoadFromOffset(kLoadWord, - card, - TR, - Thread::CardTableOffset<kMipsPointerSize>().Int32Value()); - // Calculate the address of the card corresponding to `object`. - __ Srl(temp, object, gc::accounting::CardTable::kCardShift); - __ Addu(temp, card, temp); - // Write the `art::gc::accounting::CardTable::kCardDirty` value into the - // `object`'s card. - // - // Register `card` contains the address of the card table. Note that the card - // table's base is biased during its creation so that it always starts at an - // address whose least-significant byte is equal to `kCardDirty` (see - // art::gc::accounting::CardTable::Create). Therefore the SB instruction - // below writes the `kCardDirty` (byte) value into the `object`'s card - // (located at `card + object >> kCardShift`). - // - // This dual use of the value in register `card` (1. to calculate the location - // of the card to mark; and 2. to load the `kCardDirty` value) saves a load - // (no need to explicitly load `kCardDirty` as an immediate value). - __ Sb(card, temp, 0); - if (value_can_be_null) { - __ Bind(&done); - } -} - -void CodeGeneratorMIPS::SetupBlockedRegisters() const { - // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated. - blocked_core_registers_[ZERO] = true; - blocked_core_registers_[K0] = true; - blocked_core_registers_[K1] = true; - blocked_core_registers_[GP] = true; - blocked_core_registers_[SP] = true; - blocked_core_registers_[RA] = true; - - // AT and TMP(T8) are used as temporary/scratch registers - // (similar to how AT is used by MIPS assemblers). - blocked_core_registers_[AT] = true; - blocked_core_registers_[TMP] = true; - blocked_fpu_registers_[FTMP] = true; - - if (GetInstructionSetFeatures().HasMsa()) { - // To be used just for MSA instructions. - blocked_fpu_registers_[FTMP2] = true; - } - - // Reserve suspend and thread registers. - blocked_core_registers_[S0] = true; - blocked_core_registers_[TR] = true; - - // Reserve T9 for function calls - blocked_core_registers_[T9] = true; - - // Reserve odd-numbered FPU registers. - for (size_t i = 1; i < kNumberOfFRegisters; i += 2) { - blocked_fpu_registers_[i] = true; - } - - if (GetGraph()->IsDebuggable()) { - // Stubs do not save callee-save floating point registers. If the graph - // is debuggable, we need to deal with these registers differently. For - // now, just block them. - for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { - blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; - } - } -} - -size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { - __ StoreToOffset(kStoreWord, Register(reg_id), SP, stack_index); - return kMipsWordSize; -} - -size_t CodeGeneratorMIPS::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { - __ LoadFromOffset(kLoadWord, Register(reg_id), SP, stack_index); - return kMipsWordSize; -} - -size_t CodeGeneratorMIPS::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - if (GetGraph()->HasSIMD()) { - __ StoreQToOffset(FRegister(reg_id), SP, stack_index); - } else { - __ StoreDToOffset(FRegister(reg_id), SP, stack_index); - } - return GetFloatingPointSpillSlotSize(); -} - -size_t CodeGeneratorMIPS::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - if (GetGraph()->HasSIMD()) { - __ LoadQFromOffset(FRegister(reg_id), SP, stack_index); - } else { - __ LoadDFromOffset(FRegister(reg_id), SP, stack_index); - } - return GetFloatingPointSpillSlotSize(); -} - -void CodeGeneratorMIPS::DumpCoreRegister(std::ostream& stream, int reg) const { - stream << Register(reg); -} - -void CodeGeneratorMIPS::DumpFloatingPointRegister(std::ostream& stream, int reg) const { - stream << FRegister(reg); -} - -const MipsInstructionSetFeatures& CodeGeneratorMIPS::GetInstructionSetFeatures() const { - return *GetCompilerOptions().GetInstructionSetFeatures()->AsMipsInstructionSetFeatures(); -} - -constexpr size_t kMipsDirectEntrypointRuntimeOffset = 16; - -void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path) { - ValidateInvokeRuntime(entrypoint, instruction, slow_path); - GenerateInvokeRuntime(GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value(), - IsDirectEntrypoint(entrypoint)); - if (EntrypointRequiresStackMap(entrypoint)) { - RecordPcInfo(instruction, dex_pc, slow_path); - } -} - -void CodeGeneratorMIPS::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, - HInstruction* instruction, - SlowPathCode* slow_path, - bool direct) { - ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); - GenerateInvokeRuntime(entry_point_offset, direct); -} - -void CodeGeneratorMIPS::GenerateInvokeRuntime(int32_t entry_point_offset, bool direct) { - bool reordering = __ SetReorder(false); - __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); - __ Jalr(T9); - if (direct) { - // Reserve argument space on stack (for $a0-$a3) for - // entrypoints that directly reference native implementations. - // Called function may use this space to store $a0-$a3 regs. - __ IncreaseFrameSize(kMipsDirectEntrypointRuntimeOffset); // Single instruction in delay slot. - __ DecreaseFrameSize(kMipsDirectEntrypointRuntimeOffset); - } else { - __ Nop(); // In delay slot. - } - __ SetReorder(reordering); -} - -void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, - Register class_reg) { - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - const size_t status_byte_offset = - mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_initialized_value = - enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); - - __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset); - __ Sltiu(TMP, TMP, shifted_initialized_value); - __ Bnez(TMP, slow_path->GetEntryLabel()); - // Even if the initialized flag is set, we need to ensure consistent memory ordering. - __ Sync(0); - __ Bind(slow_path->GetExitLabel()); -} - -void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, - Register temp) { - uint32_t path_to_root = check->GetBitstringPathToRoot(); - uint32_t mask = check->GetBitstringMask(); - DCHECK(IsPowerOfTwo(mask + 1)); - size_t mask_bits = WhichPowerOf2(mask + 1); - - if (mask_bits == 16u) { - // Load only the bitstring part of the status word. - __ LoadFromOffset( - kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); - // Compare the bitstring bits using XOR. - __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); - } else { - // /* uint32_t */ temp = temp->status_ - __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); - // Compare the bitstring bits using XOR. - if (IsUint<16>(path_to_root)) { - __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); - } else { - __ LoadConst32(TMP, path_to_root); - __ Xor(temp, temp, TMP); - } - // Shift out bits that do not contribute to the comparison. - __ Sll(temp, temp, 32 - mask_bits); - } -} - -void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { - __ Sync(0); // Only stype 0 is supported. -} - -void InstructionCodeGeneratorMIPS::GenerateSuspendCheck(HSuspendCheck* instruction, - HBasicBlock* successor) { - SuspendCheckSlowPathMIPS* slow_path = - down_cast<SuspendCheckSlowPathMIPS*>(instruction->GetSlowPath()); - - if (slow_path == nullptr) { - slow_path = - new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathMIPS(instruction, successor); - instruction->SetSlowPath(slow_path); - codegen_->AddSlowPath(slow_path); - if (successor != nullptr) { - DCHECK(successor->IsLoopHeader()); - } - } else { - DCHECK_EQ(slow_path->GetSuccessor(), successor); - } - - __ LoadFromOffset(kLoadUnsignedHalfword, - TMP, - TR, - Thread::ThreadFlagsOffset<kMipsPointerSize>().Int32Value()); - if (successor == nullptr) { - __ Bnez(TMP, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetReturnLabel()); - } else { - __ Beqz(TMP, codegen_->GetLabelOf(successor)); - __ B(slow_path->GetEntryLabel()); - // slow_path will return to GetLabelOf(successor). - } -} - -InstructionCodeGeneratorMIPS::InstructionCodeGeneratorMIPS(HGraph* graph, - CodeGeneratorMIPS* codegen) - : InstructionCodeGenerator(graph, codegen), - assembler_(codegen->GetAssembler()), - codegen_(codegen) {} - -void LocationsBuilderMIPS::HandleBinaryOp(HBinaryOperation* instruction) { - DCHECK_EQ(instruction->InputCount(), 2U); - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - DataType::Type type = instruction->GetResultType(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - switch (type) { - case DataType::Type::kInt32: { - locations->SetInAt(0, Location::RequiresRegister()); - HInstruction* right = instruction->InputAt(1); - bool can_use_imm = false; - if (right->IsConstant()) { - int32_t imm = CodeGenerator::GetInt32ValueOf(right->AsConstant()); - if (instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) { - can_use_imm = IsUint<16>(imm); - } else { - DCHECK(instruction->IsSub() || instruction->IsAdd()); - if (instruction->IsSub()) { - imm = -imm; - } - if (isR6) { - bool single_use = right->GetUses().HasExactlyOneElement(); - int16_t imm_high = High16Bits(imm); - int16_t imm_low = Low16Bits(imm); - if (imm_low < 0) { - imm_high += 1; - } - can_use_imm = !((imm_high != 0) && (imm_low != 0)) || single_use; - } else { - can_use_imm = IsInt<16>(imm); - } - } - } - if (can_use_imm) - locations->SetInAt(1, Location::ConstantLocation(right->AsConstant())); - else - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - } - - case DataType::Type::kInt64: { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - } - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK(instruction->IsAdd() || instruction->IsSub()); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected " << instruction->DebugName() << " type " << type; - } -} - -void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction) { - DataType::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - - switch (type) { - case DataType::Type::kInt32: { - Register dst = locations->Out().AsRegister<Register>(); - Register lhs = locations->InAt(0).AsRegister<Register>(); - Location rhs_location = locations->InAt(1); - - Register rhs_reg = ZERO; - int32_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<Register>(); - } - - if (instruction->IsAnd()) { - if (use_imm) - __ Andi(dst, lhs, rhs_imm); - else - __ And(dst, lhs, rhs_reg); - } else if (instruction->IsOr()) { - if (use_imm) - __ Ori(dst, lhs, rhs_imm); - else - __ Or(dst, lhs, rhs_reg); - } else if (instruction->IsXor()) { - if (use_imm) - __ Xori(dst, lhs, rhs_imm); - else - __ Xor(dst, lhs, rhs_reg); - } else { - DCHECK(instruction->IsAdd() || instruction->IsSub()); - if (use_imm) { - if (instruction->IsSub()) { - rhs_imm = -rhs_imm; - } - if (IsInt<16>(rhs_imm)) { - __ Addiu(dst, lhs, rhs_imm); - } else { - DCHECK(isR6); - int16_t rhs_imm_high = High16Bits(rhs_imm); - int16_t rhs_imm_low = Low16Bits(rhs_imm); - if (rhs_imm_low < 0) { - rhs_imm_high += 1; - } - __ Aui(dst, lhs, rhs_imm_high); - if (rhs_imm_low != 0) { - __ Addiu(dst, dst, rhs_imm_low); - } - } - } else if (instruction->IsAdd()) { - __ Addu(dst, lhs, rhs_reg); - } else { - DCHECK(instruction->IsSub()); - __ Subu(dst, lhs, rhs_reg); - } - } - break; - } - - case DataType::Type::kInt64: { - Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); - Register dst_low = locations->Out().AsRegisterPairLow<Register>(); - Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>(); - Location rhs_location = locations->InAt(1); - bool use_imm = rhs_location.IsConstant(); - if (!use_imm) { - Register rhs_high = rhs_location.AsRegisterPairHigh<Register>(); - Register rhs_low = rhs_location.AsRegisterPairLow<Register>(); - if (instruction->IsAnd()) { - __ And(dst_low, lhs_low, rhs_low); - __ And(dst_high, lhs_high, rhs_high); - } else if (instruction->IsOr()) { - __ Or(dst_low, lhs_low, rhs_low); - __ Or(dst_high, lhs_high, rhs_high); - } else if (instruction->IsXor()) { - __ Xor(dst_low, lhs_low, rhs_low); - __ Xor(dst_high, lhs_high, rhs_high); - } else if (instruction->IsAdd()) { - if (lhs_low == rhs_low) { - // Special case for lhs = rhs and the sum potentially overwriting both lhs and rhs. - __ Slt(TMP, lhs_low, ZERO); - __ Addu(dst_low, lhs_low, rhs_low); - } else { - __ Addu(dst_low, lhs_low, rhs_low); - // If the sum overwrites rhs, lhs remains unchanged, otherwise rhs remains unchanged. - __ Sltu(TMP, dst_low, (dst_low == rhs_low) ? lhs_low : rhs_low); - } - __ Addu(dst_high, lhs_high, rhs_high); - __ Addu(dst_high, dst_high, TMP); - } else { - DCHECK(instruction->IsSub()); - __ Sltu(TMP, lhs_low, rhs_low); - __ Subu(dst_low, lhs_low, rhs_low); - __ Subu(dst_high, lhs_high, rhs_high); - __ Subu(dst_high, dst_high, TMP); - } - } else { - int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant()); - if (instruction->IsOr()) { - uint32_t low = Low32Bits(value); - uint32_t high = High32Bits(value); - if (IsUint<16>(low)) { - if (dst_low != lhs_low || low != 0) { - __ Ori(dst_low, lhs_low, low); - } - } else { - __ LoadConst32(TMP, low); - __ Or(dst_low, lhs_low, TMP); - } - if (IsUint<16>(high)) { - if (dst_high != lhs_high || high != 0) { - __ Ori(dst_high, lhs_high, high); - } - } else { - if (high != low) { - __ LoadConst32(TMP, high); - } - __ Or(dst_high, lhs_high, TMP); - } - } else if (instruction->IsXor()) { - uint32_t low = Low32Bits(value); - uint32_t high = High32Bits(value); - if (IsUint<16>(low)) { - if (dst_low != lhs_low || low != 0) { - __ Xori(dst_low, lhs_low, low); - } - } else { - __ LoadConst32(TMP, low); - __ Xor(dst_low, lhs_low, TMP); - } - if (IsUint<16>(high)) { - if (dst_high != lhs_high || high != 0) { - __ Xori(dst_high, lhs_high, high); - } - } else { - if (high != low) { - __ LoadConst32(TMP, high); - } - __ Xor(dst_high, lhs_high, TMP); - } - } else if (instruction->IsAnd()) { - uint32_t low = Low32Bits(value); - uint32_t high = High32Bits(value); - if (IsUint<16>(low)) { - __ Andi(dst_low, lhs_low, low); - } else if (low != 0xFFFFFFFF) { - __ LoadConst32(TMP, low); - __ And(dst_low, lhs_low, TMP); - } else if (dst_low != lhs_low) { - __ Move(dst_low, lhs_low); - } - if (IsUint<16>(high)) { - __ Andi(dst_high, lhs_high, high); - } else if (high != 0xFFFFFFFF) { - if (high != low) { - __ LoadConst32(TMP, high); - } - __ And(dst_high, lhs_high, TMP); - } else if (dst_high != lhs_high) { - __ Move(dst_high, lhs_high); - } - } else { - if (instruction->IsSub()) { - value = -value; - } else { - DCHECK(instruction->IsAdd()); - } - int32_t low = Low32Bits(value); - int32_t high = High32Bits(value); - if (IsInt<16>(low)) { - if (dst_low != lhs_low || low != 0) { - __ Addiu(dst_low, lhs_low, low); - } - if (low != 0) { - __ Sltiu(AT, dst_low, low); - } - } else { - __ LoadConst32(TMP, low); - __ Addu(dst_low, lhs_low, TMP); - __ Sltu(AT, dst_low, TMP); - } - if (IsInt<16>(high)) { - if (dst_high != lhs_high || high != 0) { - __ Addiu(dst_high, lhs_high, high); - } - } else { - if (high != low) { - __ LoadConst32(TMP, high); - } - __ Addu(dst_high, lhs_high, TMP); - } - if (low != 0) { - __ Addu(dst_high, dst_high, AT); - } - } - } - break; - } - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: { - FRegister dst = locations->Out().AsFpuRegister<FRegister>(); - FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); - if (instruction->IsAdd()) { - if (type == DataType::Type::kFloat32) { - __ AddS(dst, lhs, rhs); - } else { - __ AddD(dst, lhs, rhs); - } - } else { - DCHECK(instruction->IsSub()); - if (type == DataType::Type::kFloat32) { - __ SubS(dst, lhs, rhs); - } else { - __ SubD(dst, lhs, rhs); - } - } - break; - } - - default: - LOG(FATAL) << "Unexpected binary operation type " << type; - } -} - -void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); - - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); - DataType::Type type = instr->GetResultType(); - switch (type) { - case DataType::Type::kInt32: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); - locations->SetOut(Location::RequiresRegister()); - break; - default: - LOG(FATAL) << "Unexpected shift type " << type; - } -} - -static constexpr size_t kMipsBitsPerWord = kMipsWordSize * kBitsPerByte; - -void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); - LocationSummary* locations = instr->GetLocations(); - DataType::Type type = instr->GetType(); - - Location rhs_location = locations->InAt(1); - bool use_imm = rhs_location.IsConstant(); - Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>(); - int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0; - const uint32_t shift_mask = - (type == DataType::Type::kInt32) ? kMaxIntShiftDistance : kMaxLongShiftDistance; - const uint32_t shift_value = rhs_imm & shift_mask; - // Are the INS (Insert Bit Field) and ROTR instructions supported? - bool has_ins_rotr = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); - - switch (type) { - case DataType::Type::kInt32: { - Register dst = locations->Out().AsRegister<Register>(); - Register lhs = locations->InAt(0).AsRegister<Register>(); - if (use_imm) { - if (shift_value == 0) { - if (dst != lhs) { - __ Move(dst, lhs); - } - } else if (instr->IsShl()) { - __ Sll(dst, lhs, shift_value); - } else if (instr->IsShr()) { - __ Sra(dst, lhs, shift_value); - } else if (instr->IsUShr()) { - __ Srl(dst, lhs, shift_value); - } else { - if (has_ins_rotr) { - __ Rotr(dst, lhs, shift_value); - } else { - __ Sll(TMP, lhs, (kMipsBitsPerWord - shift_value) & shift_mask); - __ Srl(dst, lhs, shift_value); - __ Or(dst, dst, TMP); - } - } - } else { - if (instr->IsShl()) { - __ Sllv(dst, lhs, rhs_reg); - } else if (instr->IsShr()) { - __ Srav(dst, lhs, rhs_reg); - } else if (instr->IsUShr()) { - __ Srlv(dst, lhs, rhs_reg); - } else { - if (has_ins_rotr) { - __ Rotrv(dst, lhs, rhs_reg); - } else { - __ Subu(TMP, ZERO, rhs_reg); - // 32-bit shift instructions use the 5 least significant bits of the shift count, so - // shifting by `-rhs_reg` is equivalent to shifting by `(32 - rhs_reg) & 31`. The case - // when `rhs_reg & 31 == 0` is OK even though we don't shift `lhs` left all the way out - // by 32, because the result in this case is computed as `(lhs >> 0) | (lhs << 0)`, - // IOW, the OR'd values are equal. - __ Sllv(TMP, lhs, TMP); - __ Srlv(dst, lhs, rhs_reg); - __ Or(dst, dst, TMP); - } - } - } - break; - } - - case DataType::Type::kInt64: { - Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); - Register dst_low = locations->Out().AsRegisterPairLow<Register>(); - Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>(); - if (use_imm) { - if (shift_value == 0) { - codegen_->MoveLocation(locations->Out(), locations->InAt(0), type); - } else if (shift_value < kMipsBitsPerWord) { - if (has_ins_rotr) { - if (instr->IsShl()) { - __ Srl(dst_high, lhs_low, kMipsBitsPerWord - shift_value); - __ Ins(dst_high, lhs_high, shift_value, kMipsBitsPerWord - shift_value); - __ Sll(dst_low, lhs_low, shift_value); - } else if (instr->IsShr()) { - __ Srl(dst_low, lhs_low, shift_value); - __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value); - __ Sra(dst_high, lhs_high, shift_value); - } else if (instr->IsUShr()) { - __ Srl(dst_low, lhs_low, shift_value); - __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value); - __ Srl(dst_high, lhs_high, shift_value); - } else { - __ Srl(dst_low, lhs_low, shift_value); - __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value); - __ Srl(dst_high, lhs_high, shift_value); - __ Ins(dst_high, lhs_low, kMipsBitsPerWord - shift_value, shift_value); - } - } else { - if (instr->IsShl()) { - __ Sll(dst_low, lhs_low, shift_value); - __ Srl(TMP, lhs_low, kMipsBitsPerWord - shift_value); - __ Sll(dst_high, lhs_high, shift_value); - __ Or(dst_high, dst_high, TMP); - } else if (instr->IsShr()) { - __ Sra(dst_high, lhs_high, shift_value); - __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value); - __ Srl(dst_low, lhs_low, shift_value); - __ Or(dst_low, dst_low, TMP); - } else if (instr->IsUShr()) { - __ Srl(dst_high, lhs_high, shift_value); - __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value); - __ Srl(dst_low, lhs_low, shift_value); - __ Or(dst_low, dst_low, TMP); - } else { - __ Srl(TMP, lhs_low, shift_value); - __ Sll(dst_low, lhs_high, kMipsBitsPerWord - shift_value); - __ Or(dst_low, dst_low, TMP); - __ Srl(TMP, lhs_high, shift_value); - __ Sll(dst_high, lhs_low, kMipsBitsPerWord - shift_value); - __ Or(dst_high, dst_high, TMP); - } - } - } else { - const uint32_t shift_value_high = shift_value - kMipsBitsPerWord; - if (instr->IsShl()) { - __ Sll(dst_high, lhs_low, shift_value_high); - __ Move(dst_low, ZERO); - } else if (instr->IsShr()) { - __ Sra(dst_low, lhs_high, shift_value_high); - __ Sra(dst_high, dst_low, kMipsBitsPerWord - 1); - } else if (instr->IsUShr()) { - __ Srl(dst_low, lhs_high, shift_value_high); - __ Move(dst_high, ZERO); - } else { - if (shift_value == kMipsBitsPerWord) { - // 64-bit rotation by 32 is just a swap. - __ Move(dst_low, lhs_high); - __ Move(dst_high, lhs_low); - } else { - if (has_ins_rotr) { - __ Srl(dst_low, lhs_high, shift_value_high); - __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value_high, shift_value_high); - __ Srl(dst_high, lhs_low, shift_value_high); - __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value_high, shift_value_high); - } else { - __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value_high); - __ Srl(dst_low, lhs_high, shift_value_high); - __ Or(dst_low, dst_low, TMP); - __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value_high); - __ Srl(dst_high, lhs_low, shift_value_high); - __ Or(dst_high, dst_high, TMP); - } - } - } - } - } else { - const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - MipsLabel done; - if (instr->IsShl()) { - __ Sllv(dst_low, lhs_low, rhs_reg); - __ Nor(AT, ZERO, rhs_reg); - __ Srl(TMP, lhs_low, 1); - __ Srlv(TMP, TMP, AT); - __ Sllv(dst_high, lhs_high, rhs_reg); - __ Or(dst_high, dst_high, TMP); - __ Andi(TMP, rhs_reg, kMipsBitsPerWord); - if (isR6) { - __ Beqzc(TMP, &done, /* is_bare= */ true); - __ Move(dst_high, dst_low); - __ Move(dst_low, ZERO); - } else { - __ Movn(dst_high, dst_low, TMP); - __ Movn(dst_low, ZERO, TMP); - } - } else if (instr->IsShr()) { - __ Srav(dst_high, lhs_high, rhs_reg); - __ Nor(AT, ZERO, rhs_reg); - __ Sll(TMP, lhs_high, 1); - __ Sllv(TMP, TMP, AT); - __ Srlv(dst_low, lhs_low, rhs_reg); - __ Or(dst_low, dst_low, TMP); - __ Andi(TMP, rhs_reg, kMipsBitsPerWord); - if (isR6) { - __ Beqzc(TMP, &done, /* is_bare= */ true); - __ Move(dst_low, dst_high); - __ Sra(dst_high, dst_high, 31); - } else { - __ Sra(AT, dst_high, 31); - __ Movn(dst_low, dst_high, TMP); - __ Movn(dst_high, AT, TMP); - } - } else if (instr->IsUShr()) { - __ Srlv(dst_high, lhs_high, rhs_reg); - __ Nor(AT, ZERO, rhs_reg); - __ Sll(TMP, lhs_high, 1); - __ Sllv(TMP, TMP, AT); - __ Srlv(dst_low, lhs_low, rhs_reg); - __ Or(dst_low, dst_low, TMP); - __ Andi(TMP, rhs_reg, kMipsBitsPerWord); - if (isR6) { - __ Beqzc(TMP, &done, /* is_bare= */ true); - __ Move(dst_low, dst_high); - __ Move(dst_high, ZERO); - } else { - __ Movn(dst_low, dst_high, TMP); - __ Movn(dst_high, ZERO, TMP); - } - } else { // Rotate. - __ Nor(AT, ZERO, rhs_reg); - __ Srlv(TMP, lhs_low, rhs_reg); - __ Sll(dst_low, lhs_high, 1); - __ Sllv(dst_low, dst_low, AT); - __ Or(dst_low, dst_low, TMP); - __ Srlv(TMP, lhs_high, rhs_reg); - __ Sll(dst_high, lhs_low, 1); - __ Sllv(dst_high, dst_high, AT); - __ Or(dst_high, dst_high, TMP); - __ Andi(TMP, rhs_reg, kMipsBitsPerWord); - if (isR6) { - __ Beqzc(TMP, &done, /* is_bare= */ true); - __ Move(TMP, dst_high); - __ Move(dst_high, dst_low); - __ Move(dst_low, TMP); - } else { - __ Movn(AT, dst_high, TMP); - __ Movn(dst_high, dst_low, TMP); - __ Movn(dst_low, AT, TMP); - } - } - __ Bind(&done); - } - break; - } - - default: - LOG(FATAL) << "Unexpected shift operation type " << type; - } -} - -void LocationsBuilderMIPS::VisitAdd(HAdd* instruction) { - HandleBinaryOp(instruction); -} - -void InstructionCodeGeneratorMIPS::VisitAdd(HAdd* instruction) { - HandleBinaryOp(instruction); -} - -void LocationsBuilderMIPS::VisitAnd(HAnd* instruction) { - HandleBinaryOp(instruction); -} - -void InstructionCodeGeneratorMIPS::VisitAnd(HAnd* instruction) { - HandleBinaryOp(instruction); -} - -void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { - DataType::Type type = instruction->GetType(); - bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (type == DataType::Type::kReference); - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(instruction, - object_array_get_with_read_barrier - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall); - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - } - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (DataType::IsFloatingPointType(type)) { - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - } else { - // The output overlaps in the case of an object array get with - // read barriers enabled: we do not want the move to overwrite the - // array's location, as we need it to emit the read barrier. - locations->SetOut(Location::RequiresRegister(), - object_array_get_with_read_barrier - ? Location::kOutputOverlap - : Location::kNoOutputOverlap); - } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier. - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - bool temp_needed = instruction->GetIndex()->IsConstant() - ? !kBakerReadBarrierThunksEnableForFields - : !kBakerReadBarrierThunksEnableForArrays; - if (temp_needed) { - locations->AddTemp(Location::RequiresRegister()); - } - } -} - -static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS* codegen) { - auto null_checker = [codegen, instruction]() { - codegen->MaybeRecordImplicitNullCheck(instruction); - }; - return null_checker; -} - -void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Location obj_loc = locations->InAt(0); - Register obj = obj_loc.AsRegister<Register>(); - Location out_loc = locations->Out(); - Location index = locations->InAt(1); - uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); - auto null_checker = GetImplicitNullChecker(instruction, codegen_); - - DataType::Type type = instruction->GetType(); - const bool maybe_compressed_char_at = mirror::kUseStringCompression && - instruction->IsStringCharAt(); - switch (type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: { - Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset, null_checker); - } else { - __ Addu(TMP, obj, index.AsRegister<Register>()); - __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kInt8: { - Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadSignedByte, out, obj, offset, null_checker); - } else { - __ Addu(TMP, obj, index.AsRegister<Register>()); - __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kUint16: { - Register out = out_loc.AsRegister<Register>(); - if (maybe_compressed_char_at) { - uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); - __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker); - __ Sll(TMP, TMP, 31); // Extract compression flag into the most significant bit of TMP. - static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, - "Expecting 0=compressed, 1=uncompressed"); - } - if (index.IsConstant()) { - int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); - if (maybe_compressed_char_at) { - MipsLabel uncompressed_load, done; - __ Bnez(TMP, &uncompressed_load); - __ LoadFromOffset(kLoadUnsignedByte, - out, - obj, - data_offset + (const_index << TIMES_1)); - __ B(&done); - __ Bind(&uncompressed_load); - __ LoadFromOffset(kLoadUnsignedHalfword, - out, - obj, - data_offset + (const_index << TIMES_2)); - __ Bind(&done); - } else { - __ LoadFromOffset(kLoadUnsignedHalfword, - out, - obj, - data_offset + (const_index << TIMES_2), - null_checker); - } - } else { - Register index_reg = index.AsRegister<Register>(); - if (maybe_compressed_char_at) { - MipsLabel uncompressed_load, done; - __ Bnez(TMP, &uncompressed_load); - __ Addu(TMP, obj, index_reg); - __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset); - __ B(&done); - __ Bind(&uncompressed_load); - __ ShiftAndAdd(TMP, index_reg, obj, TIMES_2, TMP); - __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset); - __ Bind(&done); - } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { - __ Addu(TMP, index_reg, obj); - __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker); - } else { - __ ShiftAndAdd(TMP, index_reg, obj, TIMES_2, TMP); - __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker); - } - } - break; - } - - case DataType::Type::kInt16: { - Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker); - } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { - __ Addu(TMP, index.AsRegister<Register>(), obj); - __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker); - } else { - __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_2, TMP); - __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kInt32: { - DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); - Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker); - } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { - __ Addu(TMP, index.AsRegister<Register>(), obj); - __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker); - } else { - __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_4, TMP); - __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kReference: { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - // /* HeapReference<Object> */ out = - // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - bool temp_needed = index.IsConstant() - ? !kBakerReadBarrierThunksEnableForFields - : !kBakerReadBarrierThunksEnableForArrays; - Location temp = temp_needed ? locations->GetTemp(0) : Location::NoLocation(); - // Note that a potential implicit null check is handled in this - // CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier call. - DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); - if (index.IsConstant()) { - // Array load with a constant index can be treated as a field load. - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, - out_loc, - obj, - offset, - temp, - /* needs_null_check= */ false); - } else { - codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, - out_loc, - obj, - data_offset, - index, - temp, - /* needs_null_check= */ false); - } - } else { - Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker); - // If read barriers are enabled, emit read barriers other than - // Baker's using a slow path (and also unpoison the loaded - // reference, if heap poisoning is enabled). - codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); - } else { - __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_4, TMP); - __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker); - // If read barriers are enabled, emit read barriers other than - // Baker's using a slow path (and also unpoison the loaded - // reference, if heap poisoning is enabled). - codegen_->MaybeGenerateReadBarrierSlow(instruction, - out_loc, - out_loc, - obj_loc, - data_offset, - index); - } - } - break; - } - - case DataType::Type::kInt64: { - Register out = out_loc.AsRegisterPairLow<Register>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker); - } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { - __ Addu(TMP, index.AsRegister<Register>(), obj); - __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker); - } else { - __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_8, TMP); - __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kFloat32: { - FRegister out = out_loc.AsFpuRegister<FRegister>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadSFromOffset(out, obj, offset, null_checker); - } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { - __ Addu(TMP, index.AsRegister<Register>(), obj); - __ LoadSFromOffset(out, TMP, data_offset, null_checker); - } else { - __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_4, TMP); - __ LoadSFromOffset(out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kFloat64: { - FRegister out = out_loc.AsFpuRegister<FRegister>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadDFromOffset(out, obj, offset, null_checker); - } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { - __ Addu(TMP, index.AsRegister<Register>(), obj); - __ LoadDFromOffset(out, TMP, data_offset, null_checker); - } else { - __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_8, TMP); - __ LoadDFromOffset(out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kUint32: - case DataType::Type::kUint64: - case DataType::Type::kVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorMIPS::VisitArrayLength(HArrayLength* instruction) { - LocationSummary* locations = instruction->GetLocations(); - uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); - Register obj = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadWord, out, obj, offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - // Mask out compression flag from String's array length. - if (mirror::kUseStringCompression && instruction->IsStringLength()) { - __ Srl(out, out, 1u); - } -} - -Location LocationsBuilderMIPS::RegisterOrZeroConstant(HInstruction* instruction) { - return (instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern()) - ? Location::ConstantLocation(instruction->AsConstant()) - : Location::RequiresRegister(); -} - -Location LocationsBuilderMIPS::FpuRegisterOrConstantForStore(HInstruction* instruction) { - // We can store 0.0 directly (from the ZERO register) without loading it into an FPU register. - // We can store a non-zero float or double constant without first loading it into the FPU, - // but we should only prefer this if the constant has a single use. - if (instruction->IsConstant() && - (instruction->AsConstant()->IsZeroBitPattern() || - instruction->GetUses().HasExactlyOneElement())) { - return Location::ConstantLocation(instruction->AsConstant()); - // Otherwise fall through and require an FPU register for the constant. - } - return Location::RequiresFpuRegister(); -} - -void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) { - DataType::Type value_type = instruction->GetComponentType(); - - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, - may_need_runtime_call_for_type_check ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); - - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (DataType::IsFloatingPointType(instruction->InputAt(2)->GetType())) { - locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); - } else { - locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); - } - if (needs_write_barrier) { - // Temporary register for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. - } -} - -void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - Location index = locations->InAt(1); - Location value_location = locations->InAt(2); - DataType::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - auto null_checker = GetImplicitNullChecker(instruction, codegen_); - Register base_reg = index.IsConstant() ? obj : TMP; - - switch (value_type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1; - } else { - __ Addu(base_reg, obj, index.AsRegister<Register>()); - } - if (value_location.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreByte, value, base_reg, data_offset, TMP, null_checker); - } else { - Register value = value_location.AsRegister<Register>(); - __ StoreToOffset(kStoreByte, value, base_reg, data_offset, null_checker); - } - break; - } - - case DataType::Type::kUint16: - case DataType::Type::kInt16: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2; - } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { - __ Addu(base_reg, index.AsRegister<Register>(), obj); - } else { - __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_2, base_reg); - } - if (value_location.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreHalfword, value, base_reg, data_offset, TMP, null_checker); - } else { - Register value = value_location.AsRegister<Register>(); - __ StoreToOffset(kStoreHalfword, value, base_reg, data_offset, null_checker); - } - break; - } - - case DataType::Type::kInt32: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; - } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { - __ Addu(base_reg, index.AsRegister<Register>(), obj); - } else { - __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg); - } - if (value_location.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); - } else { - Register value = value_location.AsRegister<Register>(); - __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); - } - break; - } - - case DataType::Type::kReference: { - if (value_location.IsConstant()) { - // Just setting null. - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; - } else { - __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg); - } - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - DCHECK_EQ(value, 0); - __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); - DCHECK(!needs_write_barrier); - DCHECK(!may_need_runtime_call_for_type_check); - break; - } - - DCHECK(needs_write_barrier); - Register value = value_location.AsRegister<Register>(); - Register temp1 = locations->GetTemp(0).AsRegister<Register>(); - Register temp2 = TMP; // Doesn't need to survive slow path. - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - MipsLabel done; - SlowPathCodeMIPS* slow_path = nullptr; - - if (may_need_runtime_call_for_type_check) { - slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathMIPS(instruction); - codegen_->AddSlowPath(slow_path); - if (instruction->GetValueCanBeNull()) { - MipsLabel non_zero; - __ Bnez(value, &non_zero); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; - } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { - __ Addu(base_reg, index.AsRegister<Register>(), obj); - } else { - __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg); - } - __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); - __ B(&done); - __ Bind(&non_zero); - } - - // Note that when read barriers are enabled, the type checks - // are performed without read barriers. This is fine, even in - // the case where a class object is in the from-space after - // the flip, as a comparison involving such a type would not - // produce a false positive; it may of course produce a false - // negative, in which case we would take the ArraySet slow - // path. - - // /* HeapReference<Class> */ temp1 = obj->klass_ - __ LoadFromOffset(kLoadWord, temp1, obj, class_offset, null_checker); - __ MaybeUnpoisonHeapReference(temp1); - - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - // /* HeapReference<Class> */ temp2 = value->klass_ - __ LoadFromOffset(kLoadWord, temp2, value, class_offset); - // If heap poisoning is enabled, no need to unpoison `temp1` - // nor `temp2`, as we are comparing two poisoned references. - - if (instruction->StaticTypeOfArrayIsObjectArray()) { - MipsLabel do_put; - __ Beq(temp1, temp2, &do_put); - // If heap poisoning is enabled, the `temp1` reference has - // not been unpoisoned yet; unpoison it now. - __ MaybeUnpoisonHeapReference(temp1); - - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); - // If heap poisoning is enabled, no need to unpoison - // `temp1`, as we are comparing against null below. - __ Bnez(temp1, slow_path->GetEntryLabel()); - __ Bind(&do_put); - } else { - __ Bne(temp1, temp2, slow_path->GetEntryLabel()); - } - } - - Register source = value; - if (kPoisonHeapReferences) { - // Note that in the case where `value` is a null reference, - // we do not enter this block, as a null reference does not - // need poisoning. - __ Move(temp1, value); - __ PoisonHeapReference(temp1); - source = temp1; - } - - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; - } else { - __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg); - } - __ StoreToOffset(kStoreWord, source, base_reg, data_offset); - - if (!may_need_runtime_call_for_type_check) { - codegen_->MaybeRecordImplicitNullCheck(instruction); - } - - codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); - - if (done.IsLinked()) { - __ Bind(&done); - } - - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } - break; - } - - case DataType::Type::kInt64: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8; - } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { - __ Addu(base_reg, index.AsRegister<Register>(), obj); - } else { - __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_8, base_reg); - } - if (value_location.IsConstant()) { - int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker); - } else { - Register value = value_location.AsRegisterPairLow<Register>(); - __ StoreToOffset(kStoreDoubleword, value, base_reg, data_offset, null_checker); - } - break; - } - - case DataType::Type::kFloat32: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; - } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { - __ Addu(base_reg, index.AsRegister<Register>(), obj); - } else { - __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg); - } - if (value_location.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); - } else { - FRegister value = value_location.AsFpuRegister<FRegister>(); - __ StoreSToOffset(value, base_reg, data_offset, null_checker); - } - break; - } - - case DataType::Type::kFloat64: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8; - } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { - __ Addu(base_reg, index.AsRegister<Register>(), obj); - } else { - __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_8, base_reg); - } - if (value_location.IsConstant()) { - int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker); - } else { - FRegister value = value_location.AsFpuRegister<FRegister>(); - __ StoreDToOffset(value, base_reg, data_offset, null_checker); - } - break; - } - - case DataType::Type::kUint32: - case DataType::Type::kUint64: - case DataType::Type::kVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitIntermediateArrayAddressIndex( - HIntermediateArrayAddressIndex* instruction) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); - - HIntConstant* shift = instruction->GetShift()->AsIntConstant(); - - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::ConstantLocation(shift)); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorMIPS::VisitIntermediateArrayAddressIndex( - HIntermediateArrayAddressIndex* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register index_reg = locations->InAt(0).AsRegister<Register>(); - uint32_t shift = instruction->GetShift()->AsIntConstant()->GetValue(); - __ Sll(locations->Out().AsRegister<Register>(), index_reg, shift); -} - -void LocationsBuilderMIPS::VisitBoundsCheck(HBoundsCheck* instruction) { - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); - - HInstruction* index = instruction->InputAt(0); - HInstruction* length = instruction->InputAt(1); - - bool const_index = false; - bool const_length = false; - - if (index->IsConstant()) { - if (length->IsConstant()) { - const_index = true; - const_length = true; - } else { - int32_t index_value = index->AsIntConstant()->GetValue(); - if (index_value < 0 || IsInt<16>(index_value + 1)) { - const_index = true; - } - } - } else if (length->IsConstant()) { - int32_t length_value = length->AsIntConstant()->GetValue(); - if (IsUint<15>(length_value)) { - const_length = true; - } - } - - locations->SetInAt(0, const_index - ? Location::ConstantLocation(index->AsConstant()) - : Location::RequiresRegister()); - locations->SetInAt(1, const_length - ? Location::ConstantLocation(length->AsConstant()) - : Location::RequiresRegister()); -} - -void InstructionCodeGeneratorMIPS::VisitBoundsCheck(HBoundsCheck* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Location index_loc = locations->InAt(0); - Location length_loc = locations->InAt(1); - - if (length_loc.IsConstant()) { - int32_t length = length_loc.GetConstant()->AsIntConstant()->GetValue(); - if (index_loc.IsConstant()) { - int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue(); - if (index < 0 || index >= length) { - BoundsCheckSlowPathMIPS* slow_path = - new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS(instruction); - codegen_->AddSlowPath(slow_path); - __ B(slow_path->GetEntryLabel()); - } else { - // Nothing to be done. - } - return; - } - - BoundsCheckSlowPathMIPS* slow_path = - new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS(instruction); - codegen_->AddSlowPath(slow_path); - Register index = index_loc.AsRegister<Register>(); - if (length == 0) { - __ B(slow_path->GetEntryLabel()); - } else if (length == 1) { - __ Bnez(index, slow_path->GetEntryLabel()); - } else { - DCHECK(IsUint<15>(length)) << length; - __ Sltiu(TMP, index, length); - __ Beqz(TMP, slow_path->GetEntryLabel()); - } - } else { - Register length = length_loc.AsRegister<Register>(); - BoundsCheckSlowPathMIPS* slow_path = - new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS(instruction); - codegen_->AddSlowPath(slow_path); - if (index_loc.IsConstant()) { - int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue(); - if (index < 0) { - __ B(slow_path->GetEntryLabel()); - } else if (index == 0) { - __ Blez(length, slow_path->GetEntryLabel()); - } else { - DCHECK(IsInt<16>(index + 1)) << index; - __ Sltiu(TMP, length, index + 1); - __ Bnez(TMP, slow_path->GetEntryLabel()); - } - } else { - Register index = index_loc.AsRegister<Register>(); - __ Bgeu(index, length, slow_path->GetEntryLabel()); - } - } -} - -// Temp is used for read barrier. -static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (kEmitCompilerReadBarrier && - !(kUseBakerReadBarrier && kBakerReadBarrierThunksEnableForFields) && - (kUseBakerReadBarrier || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { - return 1; - } - return 0; -} - -// Extra temp is used for read barrier. -static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { - return 1 + NumberOfInstanceOfTemps(type_check_kind); -} - -void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); - locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } - locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); -} - -void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - LocationSummary* locations = instruction->GetLocations(); - Location obj_loc = locations->InAt(0); - Register obj = obj_loc.AsRegister<Register>(); - Location cls = locations->InAt(1); - Location temp_loc = locations->GetTemp(0); - Register temp = temp_loc.AsRegister<Register>(); - const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); - DCHECK_LE(num_temps, 2u); - Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); - const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); - const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); - const uint32_t object_array_data_offset = - mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - MipsLabel done; - - bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); - SlowPathCodeMIPS* slow_path = - new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( - instruction, is_type_check_slow_path_fatal); - codegen_->AddSlowPath(slow_path); - - // Avoid this check if we know `obj` is not null. - if (instruction->MustDoNullCheck()) { - __ Beqz(obj, &done); - } - - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kArrayCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // Jump to slow path for throwing the exception or doing a - // more involved array check. - __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); - break; - } - - case TypeCheckKind::kAbstractClassCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // If the class is abstract, we eagerly fetch the super class of the - // object to avoid doing a comparison we know will fail. - MipsLabel loop; - __ Bind(&loop); - // /* HeapReference<Class> */ temp = temp->super_class_ - GenerateReferenceLoadOneRegister(instruction, - temp_loc, - super_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // If the class reference currently in `temp` is null, jump to the slow path to throw the - // exception. - __ Beqz(temp, slow_path->GetEntryLabel()); - // Otherwise, compare the classes. - __ Bne(temp, cls.AsRegister<Register>(), &loop); - break; - } - - case TypeCheckKind::kClassHierarchyCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // Walk over the class hierarchy to find a match. - MipsLabel loop; - __ Bind(&loop); - __ Beq(temp, cls.AsRegister<Register>(), &done); - // /* HeapReference<Class> */ temp = temp->super_class_ - GenerateReferenceLoadOneRegister(instruction, - temp_loc, - super_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // If the class reference currently in `temp` is null, jump to the slow path to throw the - // exception. Otherwise, jump to the beginning of the loop. - __ Bnez(temp, &loop); - __ B(slow_path->GetEntryLabel()); - break; - } - - case TypeCheckKind::kArrayObjectCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // Do an exact check. - __ Beq(temp, cls.AsRegister<Register>(), &done); - // Otherwise, we need to check that the object's class is a non-primitive array. - // /* HeapReference<Class> */ temp = temp->component_type_ - GenerateReferenceLoadOneRegister(instruction, - temp_loc, - component_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // If the component type is null, jump to the slow path to throw the exception. - __ Beqz(temp, slow_path->GetEntryLabel()); - // Otherwise, the object is indeed an array, further check that this component - // type is not a primitive type. - __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Bnez(temp, slow_path->GetEntryLabel()); - break; - } - - case TypeCheckKind::kUnresolvedCheck: - // We always go into the type check slow path for the unresolved check case. - // We cannot directly call the CheckCast runtime entry point - // without resorting to a type checking slow path here (i.e. by - // calling InvokeRuntime directly), as it would require to - // assign fixed registers for the inputs of this HInstanceOf - // instruction (following the runtime calling convention), which - // might be cluttered by the potential first read barrier - // emission at the beginning of this method. - __ B(slow_path->GetEntryLabel()); - break; - - case TypeCheckKind::kInterfaceCheck: { - // Avoid read barriers to improve performance of the fast path. We can not get false - // positives by doing this. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // /* HeapReference<Class> */ temp = temp->iftable_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - temp_loc, - iftable_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // Iftable is never null. - __ Lw(TMP, temp, array_length_offset); - // Loop through the iftable and check if any class matches. - MipsLabel loop; - __ Bind(&loop); - __ Addiu(temp, temp, 2 * kHeapReferenceSize); // Possibly in delay slot on R2. - __ Beqz(TMP, slow_path->GetEntryLabel()); - __ Lw(AT, temp, object_array_data_offset - 2 * kHeapReferenceSize); - __ MaybeUnpoisonHeapReference(AT); - // Go to next interface. - __ Addiu(TMP, TMP, -2); - // Compare the classes and continue the loop if they do not match. - __ Bne(AT, cls.AsRegister<Register>(), &loop); - break; - } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, temp); - __ Bnez(temp, slow_path->GetEntryLabel()); - break; - } - } - - __ Bind(&done); - __ Bind(slow_path->GetExitLabel()); -} - -void LocationsBuilderMIPS::VisitClinitCheck(HClinitCheck* check) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); - locations->SetInAt(0, Location::RequiresRegister()); - if (check->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } - // Rely on the type initialization to save everything we need. - locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); -} - -void InstructionCodeGeneratorMIPS::VisitClinitCheck(HClinitCheck* check) { - // We assume the class is not null. - SlowPathCodeMIPS* slow_path = - new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS(check->GetLoadClass(), check); - codegen_->AddSlowPath(slow_path); - GenerateClassInitializationCheck(slow_path, - check->GetLocations()->InAt(0).AsRegister<Register>()); -} - -void LocationsBuilderMIPS::VisitCompare(HCompare* compare) { - DataType::Type in_type = compare->InputAt(0)->GetType(); - - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); - - switch (in_type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Output overlaps because it is written before doing the low comparison. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected type for compare operation " << in_type; - } -} - -void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register res = locations->Out().AsRegister<Register>(); - DataType::Type in_type = instruction->InputAt(0)->GetType(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - - // 0 if: left == right - // 1 if: left > right - // -1 if: left < right - switch (in_type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: { - Register lhs = locations->InAt(0).AsRegister<Register>(); - Register rhs = locations->InAt(1).AsRegister<Register>(); - __ Slt(TMP, lhs, rhs); - __ Slt(res, rhs, lhs); - __ Subu(res, res, TMP); - break; - } - case DataType::Type::kInt64: { - MipsLabel done; - Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>(); - Register rhs_high = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register rhs_low = locations->InAt(1).AsRegisterPairLow<Register>(); - // TODO: more efficient (direct) comparison with a constant. - __ Slt(TMP, lhs_high, rhs_high); - __ Slt(AT, rhs_high, lhs_high); // Inverted: is actually gt. - __ Subu(res, AT, TMP); // Result -1:1:0 for [ <, >, == ]. - __ Bnez(res, &done); // If we compared ==, check if lower bits are also equal. - __ Sltu(TMP, lhs_low, rhs_low); - __ Sltu(AT, rhs_low, lhs_low); // Inverted: is actually gt. - __ Subu(res, AT, TMP); // Result -1:1:0 for [ <, >, == ]. - __ Bind(&done); - break; - } - - case DataType::Type::kFloat32: { - bool gt_bias = instruction->IsGtBias(); - FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); - MipsLabel done; - if (isR6) { - __ CmpEqS(FTMP, lhs, rhs); - __ LoadConst32(res, 0); - __ Bc1nez(FTMP, &done); - if (gt_bias) { - __ CmpLtS(FTMP, lhs, rhs); - __ LoadConst32(res, -1); - __ Bc1nez(FTMP, &done); - __ LoadConst32(res, 1); - } else { - __ CmpLtS(FTMP, rhs, lhs); - __ LoadConst32(res, 1); - __ Bc1nez(FTMP, &done); - __ LoadConst32(res, -1); - } - } else { - if (gt_bias) { - __ ColtS(0, lhs, rhs); - __ LoadConst32(res, -1); - __ Bc1t(0, &done); - __ CeqS(0, lhs, rhs); - __ LoadConst32(res, 1); - __ Movt(res, ZERO, 0); - } else { - __ ColtS(0, rhs, lhs); - __ LoadConst32(res, 1); - __ Bc1t(0, &done); - __ CeqS(0, lhs, rhs); - __ LoadConst32(res, -1); - __ Movt(res, ZERO, 0); - } - } - __ Bind(&done); - break; - } - case DataType::Type::kFloat64: { - bool gt_bias = instruction->IsGtBias(); - FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); - MipsLabel done; - if (isR6) { - __ CmpEqD(FTMP, lhs, rhs); - __ LoadConst32(res, 0); - __ Bc1nez(FTMP, &done); - if (gt_bias) { - __ CmpLtD(FTMP, lhs, rhs); - __ LoadConst32(res, -1); - __ Bc1nez(FTMP, &done); - __ LoadConst32(res, 1); - } else { - __ CmpLtD(FTMP, rhs, lhs); - __ LoadConst32(res, 1); - __ Bc1nez(FTMP, &done); - __ LoadConst32(res, -1); - } - } else { - if (gt_bias) { - __ ColtD(0, lhs, rhs); - __ LoadConst32(res, -1); - __ Bc1t(0, &done); - __ CeqD(0, lhs, rhs); - __ LoadConst32(res, 1); - __ Movt(res, ZERO, 0); - } else { - __ ColtD(0, rhs, lhs); - __ LoadConst32(res, 1); - __ Bc1t(0, &done); - __ CeqD(0, lhs, rhs); - __ LoadConst32(res, -1); - __ Movt(res, ZERO, 0); - } - } - __ Bind(&done); - break; - } - - default: - LOG(FATAL) << "Unimplemented compare type " << in_type; - } -} - -void LocationsBuilderMIPS::HandleCondition(HCondition* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - switch (instruction->InputAt(0)->GetType()) { - default: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - break; - } - if (!instruction->IsEmittedAtUseSite()) { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } -} - -void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { - if (instruction->IsEmittedAtUseSite()) { - return; - } - - DataType::Type type = instruction->InputAt(0)->GetType(); - LocationSummary* locations = instruction->GetLocations(); - - switch (type) { - default: - // Integer case. - GenerateIntCompare(instruction->GetCondition(), locations); - return; - - case DataType::Type::kInt64: - GenerateLongCompare(instruction->GetCondition(), locations); - return; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations); - return; - } -} - -void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); - DCHECK(second.IsConstant()); - int64_t imm = Int64FromConstant(second.GetConstant()); - DCHECK(imm == 1 || imm == -1); - - if (instruction->GetResultType() == DataType::Type::kInt32) { - Register out = locations->Out().AsRegister<Register>(); - Register dividend = locations->InAt(0).AsRegister<Register>(); - - if (instruction->IsRem()) { - __ Move(out, ZERO); - } else { - if (imm == -1) { - __ Subu(out, ZERO, dividend); - } else if (out != dividend) { - __ Move(out, dividend); - } - } - } else { - DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); - Register out_high = locations->Out().AsRegisterPairHigh<Register>(); - Register out_low = locations->Out().AsRegisterPairLow<Register>(); - Register in_high = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register in_low = locations->InAt(0).AsRegisterPairLow<Register>(); - - if (instruction->IsRem()) { - __ Move(out_high, ZERO); - __ Move(out_low, ZERO); - } else { - if (imm == -1) { - __ Subu(out_low, ZERO, in_low); - __ Sltu(AT, ZERO, out_low); - __ Subu(out_high, ZERO, in_high); - __ Subu(out_high, out_high, AT); - } else { - __ Move(out_low, in_low); - __ Move(out_high, in_high); - } - } - } -} - -void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); - const bool is_r2_or_newer = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); - const bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6(); - DCHECK(second.IsConstant()); - - if (instruction->GetResultType() == DataType::Type::kInt32) { - Register out = locations->Out().AsRegister<Register>(); - Register dividend = locations->InAt(0).AsRegister<Register>(); - int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); - uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); - int ctz_imm = CTZ(abs_imm); - - if (instruction->IsDiv()) { - if (ctz_imm == 1) { - // Fast path for division by +/-2, which is very common. - __ Srl(TMP, dividend, 31); - } else { - __ Sra(TMP, dividend, 31); - __ Srl(TMP, TMP, 32 - ctz_imm); - } - __ Addu(out, dividend, TMP); - __ Sra(out, out, ctz_imm); - if (imm < 0) { - __ Subu(out, ZERO, out); - } - } else { - if (ctz_imm == 1) { - // Fast path for modulo +/-2, which is very common. - __ Sra(TMP, dividend, 31); - __ Subu(out, dividend, TMP); - __ Andi(out, out, 1); - __ Addu(out, out, TMP); - } else { - __ Sra(TMP, dividend, 31); - __ Srl(TMP, TMP, 32 - ctz_imm); - __ Addu(out, dividend, TMP); - if (IsUint<16>(abs_imm - 1)) { - __ Andi(out, out, abs_imm - 1); - } else { - if (is_r2_or_newer) { - __ Ins(out, ZERO, ctz_imm, 32 - ctz_imm); - } else { - __ Sll(out, out, 32 - ctz_imm); - __ Srl(out, out, 32 - ctz_imm); - } - } - __ Subu(out, out, TMP); - } - } - } else { - DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); - Register out_high = locations->Out().AsRegisterPairHigh<Register>(); - Register out_low = locations->Out().AsRegisterPairLow<Register>(); - Register in_high = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register in_low = locations->InAt(0).AsRegisterPairLow<Register>(); - int64_t imm = Int64FromConstant(second.GetConstant()); - uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); - int ctz_imm = CTZ(abs_imm); - - if (instruction->IsDiv()) { - if (ctz_imm < 32) { - if (ctz_imm == 1) { - __ Srl(AT, in_high, 31); - } else { - __ Sra(AT, in_high, 31); - __ Srl(AT, AT, 32 - ctz_imm); - } - __ Addu(AT, AT, in_low); - __ Sltu(TMP, AT, in_low); - __ Addu(out_high, in_high, TMP); - __ Srl(out_low, AT, ctz_imm); - if (is_r2_or_newer) { - __ Ins(out_low, out_high, 32 - ctz_imm, ctz_imm); - __ Sra(out_high, out_high, ctz_imm); - } else { - __ Sll(AT, out_high, 32 - ctz_imm); - __ Sra(out_high, out_high, ctz_imm); - __ Or(out_low, out_low, AT); - } - if (imm < 0) { - __ Subu(out_low, ZERO, out_low); - __ Sltu(AT, ZERO, out_low); - __ Subu(out_high, ZERO, out_high); - __ Subu(out_high, out_high, AT); - } - } else if (ctz_imm == 32) { - __ Sra(AT, in_high, 31); - __ Addu(AT, AT, in_low); - __ Sltu(AT, AT, in_low); - __ Addu(out_low, in_high, AT); - if (imm < 0) { - __ Srl(TMP, out_low, 31); - __ Subu(out_low, ZERO, out_low); - __ Sltu(AT, ZERO, out_low); - __ Subu(out_high, TMP, AT); - } else { - __ Sra(out_high, out_low, 31); - } - } else if (ctz_imm < 63) { - __ Sra(AT, in_high, 31); - __ Srl(TMP, AT, 64 - ctz_imm); - __ Addu(AT, AT, in_low); - __ Sltu(AT, AT, in_low); - __ Addu(out_low, in_high, AT); - __ Addu(out_low, out_low, TMP); - __ Sra(out_low, out_low, ctz_imm - 32); - if (imm < 0) { - __ Subu(out_low, ZERO, out_low); - } - __ Sra(out_high, out_low, 31); - } else { - DCHECK_LT(imm, 0); - if (is_r6) { - __ Aui(AT, in_high, 0x8000); - } else { - __ Lui(AT, 0x8000); - __ Xor(AT, AT, in_high); - } - __ Or(AT, AT, in_low); - __ Sltiu(out_low, AT, 1); - __ Move(out_high, ZERO); - } - } else { - if ((ctz_imm == 1) && !is_r6) { - __ Andi(AT, in_low, 1); - __ Sll(TMP, in_low, 31); - __ And(TMP, in_high, TMP); - __ Sra(out_high, TMP, 31); - __ Or(out_low, out_high, AT); - } else if (ctz_imm < 32) { - __ Sra(AT, in_high, 31); - if (ctz_imm <= 16) { - __ Andi(out_low, in_low, abs_imm - 1); - } else if (is_r2_or_newer) { - __ Ext(out_low, in_low, 0, ctz_imm); - } else { - __ Sll(out_low, in_low, 32 - ctz_imm); - __ Srl(out_low, out_low, 32 - ctz_imm); - } - if (is_r6) { - __ Selnez(out_high, AT, out_low); - } else { - __ Movz(AT, ZERO, out_low); - __ Move(out_high, AT); - } - if (is_r2_or_newer) { - __ Ins(out_low, out_high, ctz_imm, 32 - ctz_imm); - } else { - __ Sll(AT, out_high, ctz_imm); - __ Or(out_low, out_low, AT); - } - } else if (ctz_imm == 32) { - __ Sra(AT, in_high, 31); - __ Move(out_low, in_low); - if (is_r6) { - __ Selnez(out_high, AT, out_low); - } else { - __ Movz(AT, ZERO, out_low); - __ Move(out_high, AT); - } - } else if (ctz_imm < 63) { - __ Sra(AT, in_high, 31); - __ Move(TMP, in_low); - if (ctz_imm - 32 <= 16) { - __ Andi(out_high, in_high, (1 << (ctz_imm - 32)) - 1); - } else if (is_r2_or_newer) { - __ Ext(out_high, in_high, 0, ctz_imm - 32); - } else { - __ Sll(out_high, in_high, 64 - ctz_imm); - __ Srl(out_high, out_high, 64 - ctz_imm); - } - __ Move(out_low, TMP); - __ Or(TMP, TMP, out_high); - if (is_r6) { - __ Selnez(AT, AT, TMP); - } else { - __ Movz(AT, ZERO, TMP); - } - if (is_r2_or_newer) { - __ Ins(out_high, AT, ctz_imm - 32, 64 - ctz_imm); - } else { - __ Sll(AT, AT, ctz_imm - 32); - __ Or(out_high, out_high, AT); - } - } else { - if (is_r6) { - __ Aui(AT, in_high, 0x8000); - } else { - __ Lui(AT, 0x8000); - __ Xor(AT, AT, in_high); - } - __ Or(AT, AT, in_low); - __ Sltiu(AT, AT, 1); - __ Sll(AT, AT, 31); - __ Move(out_low, in_low); - __ Xor(out_high, in_high, AT); - } - } - } -} - -void InstructionCodeGeneratorMIPS::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt32); - - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); - DCHECK(second.IsConstant()); - - Register out = locations->Out().AsRegister<Register>(); - Register dividend = locations->InAt(0).AsRegister<Register>(); - int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); - - int64_t magic; - int shift; - CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift); - - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - - __ LoadConst32(TMP, magic); - if (isR6) { - __ MuhR6(TMP, dividend, TMP); - } else { - __ MultR2(dividend, TMP); - __ Mfhi(TMP); - } - if (imm > 0 && magic < 0) { - __ Addu(TMP, TMP, dividend); - } else if (imm < 0 && magic > 0) { - __ Subu(TMP, TMP, dividend); - } - - if (shift != 0) { - __ Sra(TMP, TMP, shift); - } - - if (instruction->IsDiv()) { - __ Sra(out, TMP, 31); - __ Subu(out, TMP, out); - } else { - __ Sra(AT, TMP, 31); - __ Subu(AT, TMP, AT); - __ LoadConst32(TMP, imm); - if (isR6) { - __ MulR6(TMP, AT, TMP); - } else { - __ MulR2(TMP, AT, TMP); - } - __ Subu(out, dividend, TMP); - } -} - -void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt32); - - LocationSummary* locations = instruction->GetLocations(); - Register out = locations->Out().AsRegister<Register>(); - Location second = locations->InAt(1); - - if (second.IsConstant()) { - int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); - if (imm == 0) { - // Do not generate anything. DivZeroCheck would prevent any code to be executed. - } else if (imm == 1 || imm == -1) { - DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(AbsOrMin(imm))) { - DivRemByPowerOfTwo(instruction); - } else { - DCHECK(imm <= -2 || imm >= 2); - GenerateDivRemWithAnyConstant(instruction); - } - } else { - Register dividend = locations->InAt(0).AsRegister<Register>(); - Register divisor = second.AsRegister<Register>(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - if (instruction->IsDiv()) { - if (isR6) { - __ DivR6(out, dividend, divisor); - } else { - __ DivR2(out, dividend, divisor); - } - } else { - if (isR6) { - __ ModR6(out, dividend, divisor); - } else { - __ ModR2(out, dividend, divisor); - } - } - } -} - -void LocationsBuilderMIPS::VisitDiv(HDiv* div) { - DataType::Type type = div->GetResultType(); - bool call_long_div = false; - if (type == DataType::Type::kInt64) { - if (div->InputAt(1)->IsConstant()) { - int64_t imm = CodeGenerator::GetInt64ValueOf(div->InputAt(1)->AsConstant()); - call_long_div = (imm != 0) && !IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm))); - } else { - call_long_div = true; - } - } - LocationSummary::CallKind call_kind = call_long_div - ? LocationSummary::kCallOnMainOnly - : LocationSummary::kNoCall; - - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind); - - switch (type) { - case DataType::Type::kInt32: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - case DataType::Type::kInt64: { - if (call_long_div) { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); - locations->SetInAt(1, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - locations->SetOut(calling_convention.GetReturnLocation(type)); - } else { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant())); - locations->SetOut(Location::RequiresRegister()); - } - break; - } - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected div type " << type; - } -} - -void InstructionCodeGeneratorMIPS::VisitDiv(HDiv* instruction) { - DataType::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - - switch (type) { - case DataType::Type::kInt32: - GenerateDivRemIntegral(instruction); - break; - case DataType::Type::kInt64: { - if (locations->InAt(1).IsConstant()) { - int64_t imm = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue(); - if (imm == 0) { - // Do not generate anything. DivZeroCheck would prevent any code to be executed. - } else if (imm == 1 || imm == -1) { - DivRemOneOrMinusOne(instruction); - } else { - DCHECK(IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm)))); - DivRemByPowerOfTwo(instruction); - } - } else { - codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); - } - break; - } - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: { - FRegister dst = locations->Out().AsFpuRegister<FRegister>(); - FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); - if (type == DataType::Type::kFloat32) { - __ DivS(dst, lhs, rhs); - } else { - __ DivD(dst, lhs, rhs); - } - break; - } - default: - LOG(FATAL) << "Unexpected div type " << type; - } -} - -void LocationsBuilderMIPS::VisitDivZeroCheck(HDivZeroCheck* instruction) { - LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); - locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); -} - -void InstructionCodeGeneratorMIPS::VisitDivZeroCheck(HDivZeroCheck* instruction) { - SlowPathCodeMIPS* slow_path = - new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathMIPS(instruction); - codegen_->AddSlowPath(slow_path); - Location value = instruction->GetLocations()->InAt(0); - DataType::Type type = instruction->GetType(); - - switch (type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: { - if (value.IsConstant()) { - if (value.GetConstant()->AsIntConstant()->GetValue() == 0) { - __ B(slow_path->GetEntryLabel()); - } else { - // A division by a non-null constant is valid. We don't need to perform - // any check, so simply fall through. - } - } else { - DCHECK(value.IsRegister()) << value; - __ Beqz(value.AsRegister<Register>(), slow_path->GetEntryLabel()); - } - break; - } - case DataType::Type::kInt64: { - if (value.IsConstant()) { - if (value.GetConstant()->AsLongConstant()->GetValue() == 0) { - __ B(slow_path->GetEntryLabel()); - } else { - // A division by a non-null constant is valid. We don't need to perform - // any check, so simply fall through. - } - } else { - DCHECK(value.IsRegisterPair()) << value; - __ Or(TMP, value.AsRegisterPairHigh<Register>(), value.AsRegisterPairLow<Register>()); - __ Beqz(TMP, slow_path->GetEntryLabel()); - } - break; - } - default: - LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; - } -} - -void LocationsBuilderMIPS::VisitDoubleConstant(HDoubleConstant* constant) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); - locations->SetOut(Location::ConstantLocation(constant)); -} - -void InstructionCodeGeneratorMIPS::VisitDoubleConstant(HDoubleConstant* cst ATTRIBUTE_UNUSED) { - // Will be generated at use site. -} - -void LocationsBuilderMIPS::VisitExit(HExit* exit) { - exit->SetLocations(nullptr); -} - -void InstructionCodeGeneratorMIPS::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { -} - -void LocationsBuilderMIPS::VisitFloatConstant(HFloatConstant* constant) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); - locations->SetOut(Location::ConstantLocation(constant)); -} - -void InstructionCodeGeneratorMIPS::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { - // Will be generated at use site. -} - -void LocationsBuilderMIPS::VisitGoto(HGoto* got) { - got->SetLocations(nullptr); -} - -void InstructionCodeGeneratorMIPS::HandleGoto(HInstruction* got, HBasicBlock* successor) { - if (successor->IsExitBlock()) { - DCHECK(got->GetPrevious()->AlwaysThrows()); - return; // no code needed - } - - HBasicBlock* block = got->GetBlock(); - HInstruction* previous = got->GetPrevious(); - HLoopInformation* info = block->GetLoopInformation(); - - if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - __ Lw(AT, SP, kCurrentMethodStackOffset); - __ Lhu(TMP, AT, ArtMethod::HotnessCountOffset().Int32Value()); - __ Addiu(TMP, TMP, 1); - __ Sh(TMP, AT, ArtMethod::HotnessCountOffset().Int32Value()); - } - GenerateSuspendCheck(info->GetSuspendCheck(), successor); - return; - } - if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { - GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); - } - if (!codegen_->GoesToNextBlock(block, successor)) { - __ B(codegen_->GetLabelOf(successor)); - } -} - -void InstructionCodeGeneratorMIPS::VisitGoto(HGoto* got) { - HandleGoto(got, got->GetSuccessor()); -} - -void LocationsBuilderMIPS::VisitTryBoundary(HTryBoundary* try_boundary) { - try_boundary->SetLocations(nullptr); -} - -void InstructionCodeGeneratorMIPS::VisitTryBoundary(HTryBoundary* try_boundary) { - HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); - if (!successor->IsExitBlock()) { - HandleGoto(try_boundary, successor); - } -} - -void InstructionCodeGeneratorMIPS::GenerateIntCompare(IfCondition cond, - LocationSummary* locations) { - Register dst = locations->Out().AsRegister<Register>(); - Register lhs = locations->InAt(0).AsRegister<Register>(); - Location rhs_location = locations->InAt(1); - Register rhs_reg = ZERO; - int64_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<Register>(); - } - - switch (cond) { - case kCondEQ: - case kCondNE: - if (use_imm && IsInt<16>(-rhs_imm)) { - if (rhs_imm == 0) { - if (cond == kCondEQ) { - __ Sltiu(dst, lhs, 1); - } else { - __ Sltu(dst, ZERO, lhs); - } - } else { - __ Addiu(dst, lhs, -rhs_imm); - if (cond == kCondEQ) { - __ Sltiu(dst, dst, 1); - } else { - __ Sltu(dst, ZERO, dst); - } - } - } else { - if (use_imm && IsUint<16>(rhs_imm)) { - __ Xori(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Xor(dst, lhs, rhs_reg); - } - if (cond == kCondEQ) { - __ Sltiu(dst, dst, 1); - } else { - __ Sltu(dst, ZERO, dst); - } - } - break; - - case kCondLT: - case kCondGE: - if (use_imm && IsInt<16>(rhs_imm)) { - __ Slti(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, lhs, rhs_reg); - } - if (cond == kCondGE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the slt instruction but no sge. - __ Xori(dst, dst, 1); - } - break; - - case kCondLE: - case kCondGT: - if (use_imm && IsInt<16>(rhs_imm + 1)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Slti(dst, lhs, rhs_imm + 1); - if (cond == kCondGT) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the slti instruction but no sgti. - __ Xori(dst, dst, 1); - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, rhs_reg, lhs); - if (cond == kCondLE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the slt instruction but no sle. - __ Xori(dst, dst, 1); - } - } - break; - - case kCondB: - case kCondAE: - if (use_imm && IsInt<16>(rhs_imm)) { - // Sltiu sign-extends its 16-bit immediate operand before - // the comparison and thus lets us compare directly with - // unsigned values in the ranges [0, 0x7fff] and - // [0xffff8000, 0xffffffff]. - __ Sltiu(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, lhs, rhs_reg); - } - if (cond == kCondAE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the sltu instruction but no sgeu. - __ Xori(dst, dst, 1); - } - break; - - case kCondBE: - case kCondA: - if (use_imm && (rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - // Note that this only works if rhs + 1 does not overflow - // to 0, hence the check above. - // Sltiu sign-extends its 16-bit immediate operand before - // the comparison and thus lets us compare directly with - // unsigned values in the ranges [0, 0x7fff] and - // [0xffff8000, 0xffffffff]. - __ Sltiu(dst, lhs, rhs_imm + 1); - if (cond == kCondA) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the sltiu instruction but no sgtiu. - __ Xori(dst, dst, 1); - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, rhs_reg, lhs); - if (cond == kCondBE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the sltu instruction but no sleu. - __ Xori(dst, dst, 1); - } - } - break; - } -} - -bool InstructionCodeGeneratorMIPS::MaterializeIntCompare(IfCondition cond, - LocationSummary* input_locations, - Register dst) { - Register lhs = input_locations->InAt(0).AsRegister<Register>(); - Location rhs_location = input_locations->InAt(1); - Register rhs_reg = ZERO; - int64_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<Register>(); - } - - switch (cond) { - case kCondEQ: - case kCondNE: - if (use_imm && IsInt<16>(-rhs_imm)) { - __ Addiu(dst, lhs, -rhs_imm); - } else if (use_imm && IsUint<16>(rhs_imm)) { - __ Xori(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Xor(dst, lhs, rhs_reg); - } - return (cond == kCondEQ); - - case kCondLT: - case kCondGE: - if (use_imm && IsInt<16>(rhs_imm)) { - __ Slti(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, lhs, rhs_reg); - } - return (cond == kCondGE); - - case kCondLE: - case kCondGT: - if (use_imm && IsInt<16>(rhs_imm + 1)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Slti(dst, lhs, rhs_imm + 1); - return (cond == kCondGT); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, rhs_reg, lhs); - return (cond == kCondLE); - } - - case kCondB: - case kCondAE: - if (use_imm && IsInt<16>(rhs_imm)) { - // Sltiu sign-extends its 16-bit immediate operand before - // the comparison and thus lets us compare directly with - // unsigned values in the ranges [0, 0x7fff] and - // [0xffff8000, 0xffffffff]. - __ Sltiu(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, lhs, rhs_reg); - } - return (cond == kCondAE); - - case kCondBE: - case kCondA: - if (use_imm && (rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - // Note that this only works if rhs + 1 does not overflow - // to 0, hence the check above. - // Sltiu sign-extends its 16-bit immediate operand before - // the comparison and thus lets us compare directly with - // unsigned values in the ranges [0, 0x7fff] and - // [0xffff8000, 0xffffffff]. - __ Sltiu(dst, lhs, rhs_imm + 1); - return (cond == kCondA); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, rhs_reg, lhs); - return (cond == kCondBE); - } - } -} - -void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond, - LocationSummary* locations, - MipsLabel* label) { - Register lhs = locations->InAt(0).AsRegister<Register>(); - Location rhs_location = locations->InAt(1); - Register rhs_reg = ZERO; - int64_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<Register>(); - } - - if (use_imm && rhs_imm == 0) { - switch (cond) { - case kCondEQ: - case kCondBE: // <= 0 if zero - __ Beqz(lhs, label); - break; - case kCondNE: - case kCondA: // > 0 if non-zero - __ Bnez(lhs, label); - break; - case kCondLT: - __ Bltz(lhs, label); - break; - case kCondGE: - __ Bgez(lhs, label); - break; - case kCondLE: - __ Blez(lhs, label); - break; - case kCondGT: - __ Bgtz(lhs, label); - break; - case kCondB: // always false - break; - case kCondAE: // always true - __ B(label); - break; - } - } else { - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - if (isR6 || !use_imm) { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - switch (cond) { - case kCondEQ: - __ Beq(lhs, rhs_reg, label); - break; - case kCondNE: - __ Bne(lhs, rhs_reg, label); - break; - case kCondLT: - __ Blt(lhs, rhs_reg, label); - break; - case kCondGE: - __ Bge(lhs, rhs_reg, label); - break; - case kCondLE: - __ Bge(rhs_reg, lhs, label); - break; - case kCondGT: - __ Blt(rhs_reg, lhs, label); - break; - case kCondB: - __ Bltu(lhs, rhs_reg, label); - break; - case kCondAE: - __ Bgeu(lhs, rhs_reg, label); - break; - case kCondBE: - __ Bgeu(rhs_reg, lhs, label); - break; - case kCondA: - __ Bltu(rhs_reg, lhs, label); - break; - } - } else { - // Special cases for more efficient comparison with constants on R2. - switch (cond) { - case kCondEQ: - __ LoadConst32(TMP, rhs_imm); - __ Beq(lhs, TMP, label); - break; - case kCondNE: - __ LoadConst32(TMP, rhs_imm); - __ Bne(lhs, TMP, label); - break; - case kCondLT: - if (IsInt<16>(rhs_imm)) { - __ Slti(TMP, lhs, rhs_imm); - __ Bnez(TMP, label); - } else { - __ LoadConst32(TMP, rhs_imm); - __ Blt(lhs, TMP, label); - } - break; - case kCondGE: - if (IsInt<16>(rhs_imm)) { - __ Slti(TMP, lhs, rhs_imm); - __ Beqz(TMP, label); - } else { - __ LoadConst32(TMP, rhs_imm); - __ Bge(lhs, TMP, label); - } - break; - case kCondLE: - if (IsInt<16>(rhs_imm + 1)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Slti(TMP, lhs, rhs_imm + 1); - __ Bnez(TMP, label); - } else { - __ LoadConst32(TMP, rhs_imm); - __ Bge(TMP, lhs, label); - } - break; - case kCondGT: - if (IsInt<16>(rhs_imm + 1)) { - // Simulate lhs > rhs via !(lhs < rhs + 1). - __ Slti(TMP, lhs, rhs_imm + 1); - __ Beqz(TMP, label); - } else { - __ LoadConst32(TMP, rhs_imm); - __ Blt(TMP, lhs, label); - } - break; - case kCondB: - if (IsInt<16>(rhs_imm)) { - __ Sltiu(TMP, lhs, rhs_imm); - __ Bnez(TMP, label); - } else { - __ LoadConst32(TMP, rhs_imm); - __ Bltu(lhs, TMP, label); - } - break; - case kCondAE: - if (IsInt<16>(rhs_imm)) { - __ Sltiu(TMP, lhs, rhs_imm); - __ Beqz(TMP, label); - } else { - __ LoadConst32(TMP, rhs_imm); - __ Bgeu(lhs, TMP, label); - } - break; - case kCondBE: - if ((rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - // Note that this only works if rhs + 1 does not overflow - // to 0, hence the check above. - __ Sltiu(TMP, lhs, rhs_imm + 1); - __ Bnez(TMP, label); - } else { - __ LoadConst32(TMP, rhs_imm); - __ Bgeu(TMP, lhs, label); - } - break; - case kCondA: - if ((rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) { - // Simulate lhs > rhs via !(lhs < rhs + 1). - // Note that this only works if rhs + 1 does not overflow - // to 0, hence the check above. - __ Sltiu(TMP, lhs, rhs_imm + 1); - __ Beqz(TMP, label); - } else { - __ LoadConst32(TMP, rhs_imm); - __ Bltu(TMP, lhs, label); - } - break; - } - } - } -} - -void InstructionCodeGeneratorMIPS::GenerateLongCompare(IfCondition cond, - LocationSummary* locations) { - Register dst = locations->Out().AsRegister<Register>(); - Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>(); - Location rhs_location = locations->InAt(1); - Register rhs_high = ZERO; - Register rhs_low = ZERO; - int64_t imm = 0; - uint32_t imm_high = 0; - uint32_t imm_low = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - imm = rhs_location.GetConstant()->AsLongConstant()->GetValue(); - imm_high = High32Bits(imm); - imm_low = Low32Bits(imm); - } else { - rhs_high = rhs_location.AsRegisterPairHigh<Register>(); - rhs_low = rhs_location.AsRegisterPairLow<Register>(); - } - if (use_imm && imm == 0) { - switch (cond) { - case kCondEQ: - case kCondBE: // <= 0 if zero - __ Or(dst, lhs_high, lhs_low); - __ Sltiu(dst, dst, 1); - break; - case kCondNE: - case kCondA: // > 0 if non-zero - __ Or(dst, lhs_high, lhs_low); - __ Sltu(dst, ZERO, dst); - break; - case kCondLT: - __ Slt(dst, lhs_high, ZERO); - break; - case kCondGE: - __ Slt(dst, lhs_high, ZERO); - __ Xori(dst, dst, 1); - break; - case kCondLE: - __ Or(TMP, lhs_high, lhs_low); - __ Sra(AT, lhs_high, 31); - __ Sltu(dst, AT, TMP); - __ Xori(dst, dst, 1); - break; - case kCondGT: - __ Or(TMP, lhs_high, lhs_low); - __ Sra(AT, lhs_high, 31); - __ Sltu(dst, AT, TMP); - break; - case kCondB: // always false - __ Andi(dst, dst, 0); - break; - case kCondAE: // always true - __ Ori(dst, ZERO, 1); - break; - } - } else if (use_imm) { - // TODO: more efficient comparison with constants without loading them into TMP/AT. - switch (cond) { - case kCondEQ: - __ LoadConst32(TMP, imm_high); - __ Xor(TMP, TMP, lhs_high); - __ LoadConst32(AT, imm_low); - __ Xor(AT, AT, lhs_low); - __ Or(dst, TMP, AT); - __ Sltiu(dst, dst, 1); - break; - case kCondNE: - __ LoadConst32(TMP, imm_high); - __ Xor(TMP, TMP, lhs_high); - __ LoadConst32(AT, imm_low); - __ Xor(AT, AT, lhs_low); - __ Or(dst, TMP, AT); - __ Sltu(dst, ZERO, dst); - break; - case kCondLT: - case kCondGE: - if (dst == lhs_low) { - __ LoadConst32(TMP, imm_low); - __ Sltu(dst, lhs_low, TMP); - } - __ LoadConst32(TMP, imm_high); - __ Slt(AT, lhs_high, TMP); - __ Slt(TMP, TMP, lhs_high); - if (dst != lhs_low) { - __ LoadConst32(dst, imm_low); - __ Sltu(dst, lhs_low, dst); - } - __ Slt(dst, TMP, dst); - __ Or(dst, dst, AT); - if (cond == kCondGE) { - __ Xori(dst, dst, 1); - } - break; - case kCondGT: - case kCondLE: - if (dst == lhs_low) { - __ LoadConst32(TMP, imm_low); - __ Sltu(dst, TMP, lhs_low); - } - __ LoadConst32(TMP, imm_high); - __ Slt(AT, TMP, lhs_high); - __ Slt(TMP, lhs_high, TMP); - if (dst != lhs_low) { - __ LoadConst32(dst, imm_low); - __ Sltu(dst, dst, lhs_low); - } - __ Slt(dst, TMP, dst); - __ Or(dst, dst, AT); - if (cond == kCondLE) { - __ Xori(dst, dst, 1); - } - break; - case kCondB: - case kCondAE: - if (dst == lhs_low) { - __ LoadConst32(TMP, imm_low); - __ Sltu(dst, lhs_low, TMP); - } - __ LoadConst32(TMP, imm_high); - __ Sltu(AT, lhs_high, TMP); - __ Sltu(TMP, TMP, lhs_high); - if (dst != lhs_low) { - __ LoadConst32(dst, imm_low); - __ Sltu(dst, lhs_low, dst); - } - __ Slt(dst, TMP, dst); - __ Or(dst, dst, AT); - if (cond == kCondAE) { - __ Xori(dst, dst, 1); - } - break; - case kCondA: - case kCondBE: - if (dst == lhs_low) { - __ LoadConst32(TMP, imm_low); - __ Sltu(dst, TMP, lhs_low); - } - __ LoadConst32(TMP, imm_high); - __ Sltu(AT, TMP, lhs_high); - __ Sltu(TMP, lhs_high, TMP); - if (dst != lhs_low) { - __ LoadConst32(dst, imm_low); - __ Sltu(dst, dst, lhs_low); - } - __ Slt(dst, TMP, dst); - __ Or(dst, dst, AT); - if (cond == kCondBE) { - __ Xori(dst, dst, 1); - } - break; - } - } else { - switch (cond) { - case kCondEQ: - __ Xor(TMP, lhs_high, rhs_high); - __ Xor(AT, lhs_low, rhs_low); - __ Or(dst, TMP, AT); - __ Sltiu(dst, dst, 1); - break; - case kCondNE: - __ Xor(TMP, lhs_high, rhs_high); - __ Xor(AT, lhs_low, rhs_low); - __ Or(dst, TMP, AT); - __ Sltu(dst, ZERO, dst); - break; - case kCondLT: - case kCondGE: - __ Slt(TMP, rhs_high, lhs_high); - __ Sltu(AT, lhs_low, rhs_low); - __ Slt(TMP, TMP, AT); - __ Slt(AT, lhs_high, rhs_high); - __ Or(dst, AT, TMP); - if (cond == kCondGE) { - __ Xori(dst, dst, 1); - } - break; - case kCondGT: - case kCondLE: - __ Slt(TMP, lhs_high, rhs_high); - __ Sltu(AT, rhs_low, lhs_low); - __ Slt(TMP, TMP, AT); - __ Slt(AT, rhs_high, lhs_high); - __ Or(dst, AT, TMP); - if (cond == kCondLE) { - __ Xori(dst, dst, 1); - } - break; - case kCondB: - case kCondAE: - __ Sltu(TMP, rhs_high, lhs_high); - __ Sltu(AT, lhs_low, rhs_low); - __ Slt(TMP, TMP, AT); - __ Sltu(AT, lhs_high, rhs_high); - __ Or(dst, AT, TMP); - if (cond == kCondAE) { - __ Xori(dst, dst, 1); - } - break; - case kCondA: - case kCondBE: - __ Sltu(TMP, lhs_high, rhs_high); - __ Sltu(AT, rhs_low, lhs_low); - __ Slt(TMP, TMP, AT); - __ Sltu(AT, rhs_high, lhs_high); - __ Or(dst, AT, TMP); - if (cond == kCondBE) { - __ Xori(dst, dst, 1); - } - break; - } - } -} - -void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond, - LocationSummary* locations, - MipsLabel* label) { - Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>(); - Location rhs_location = locations->InAt(1); - Register rhs_high = ZERO; - Register rhs_low = ZERO; - int64_t imm = 0; - uint32_t imm_high = 0; - uint32_t imm_low = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - imm = rhs_location.GetConstant()->AsLongConstant()->GetValue(); - imm_high = High32Bits(imm); - imm_low = Low32Bits(imm); - } else { - rhs_high = rhs_location.AsRegisterPairHigh<Register>(); - rhs_low = rhs_location.AsRegisterPairLow<Register>(); - } - - if (use_imm && imm == 0) { - switch (cond) { - case kCondEQ: - case kCondBE: // <= 0 if zero - __ Or(TMP, lhs_high, lhs_low); - __ Beqz(TMP, label); - break; - case kCondNE: - case kCondA: // > 0 if non-zero - __ Or(TMP, lhs_high, lhs_low); - __ Bnez(TMP, label); - break; - case kCondLT: - __ Bltz(lhs_high, label); - break; - case kCondGE: - __ Bgez(lhs_high, label); - break; - case kCondLE: - __ Or(TMP, lhs_high, lhs_low); - __ Sra(AT, lhs_high, 31); - __ Bgeu(AT, TMP, label); - break; - case kCondGT: - __ Or(TMP, lhs_high, lhs_low); - __ Sra(AT, lhs_high, 31); - __ Bltu(AT, TMP, label); - break; - case kCondB: // always false - break; - case kCondAE: // always true - __ B(label); - break; - } - } else if (use_imm) { - // TODO: more efficient comparison with constants without loading them into TMP/AT. - switch (cond) { - case kCondEQ: - __ LoadConst32(TMP, imm_high); - __ Xor(TMP, TMP, lhs_high); - __ LoadConst32(AT, imm_low); - __ Xor(AT, AT, lhs_low); - __ Or(TMP, TMP, AT); - __ Beqz(TMP, label); - break; - case kCondNE: - __ LoadConst32(TMP, imm_high); - __ Xor(TMP, TMP, lhs_high); - __ LoadConst32(AT, imm_low); - __ Xor(AT, AT, lhs_low); - __ Or(TMP, TMP, AT); - __ Bnez(TMP, label); - break; - case kCondLT: - __ LoadConst32(TMP, imm_high); - __ Blt(lhs_high, TMP, label); - __ Slt(TMP, TMP, lhs_high); - __ LoadConst32(AT, imm_low); - __ Sltu(AT, lhs_low, AT); - __ Blt(TMP, AT, label); - break; - case kCondGE: - __ LoadConst32(TMP, imm_high); - __ Blt(TMP, lhs_high, label); - __ Slt(TMP, lhs_high, TMP); - __ LoadConst32(AT, imm_low); - __ Sltu(AT, lhs_low, AT); - __ Or(TMP, TMP, AT); - __ Beqz(TMP, label); - break; - case kCondLE: - __ LoadConst32(TMP, imm_high); - __ Blt(lhs_high, TMP, label); - __ Slt(TMP, TMP, lhs_high); - __ LoadConst32(AT, imm_low); - __ Sltu(AT, AT, lhs_low); - __ Or(TMP, TMP, AT); - __ Beqz(TMP, label); - break; - case kCondGT: - __ LoadConst32(TMP, imm_high); - __ Blt(TMP, lhs_high, label); - __ Slt(TMP, lhs_high, TMP); - __ LoadConst32(AT, imm_low); - __ Sltu(AT, AT, lhs_low); - __ Blt(TMP, AT, label); - break; - case kCondB: - __ LoadConst32(TMP, imm_high); - __ Bltu(lhs_high, TMP, label); - __ Sltu(TMP, TMP, lhs_high); - __ LoadConst32(AT, imm_low); - __ Sltu(AT, lhs_low, AT); - __ Blt(TMP, AT, label); - break; - case kCondAE: - __ LoadConst32(TMP, imm_high); - __ Bltu(TMP, lhs_high, label); - __ Sltu(TMP, lhs_high, TMP); - __ LoadConst32(AT, imm_low); - __ Sltu(AT, lhs_low, AT); - __ Or(TMP, TMP, AT); - __ Beqz(TMP, label); - break; - case kCondBE: - __ LoadConst32(TMP, imm_high); - __ Bltu(lhs_high, TMP, label); - __ Sltu(TMP, TMP, lhs_high); - __ LoadConst32(AT, imm_low); - __ Sltu(AT, AT, lhs_low); - __ Or(TMP, TMP, AT); - __ Beqz(TMP, label); - break; - case kCondA: - __ LoadConst32(TMP, imm_high); - __ Bltu(TMP, lhs_high, label); - __ Sltu(TMP, lhs_high, TMP); - __ LoadConst32(AT, imm_low); - __ Sltu(AT, AT, lhs_low); - __ Blt(TMP, AT, label); - break; - } - } else { - switch (cond) { - case kCondEQ: - __ Xor(TMP, lhs_high, rhs_high); - __ Xor(AT, lhs_low, rhs_low); - __ Or(TMP, TMP, AT); - __ Beqz(TMP, label); - break; - case kCondNE: - __ Xor(TMP, lhs_high, rhs_high); - __ Xor(AT, lhs_low, rhs_low); - __ Or(TMP, TMP, AT); - __ Bnez(TMP, label); - break; - case kCondLT: - __ Blt(lhs_high, rhs_high, label); - __ Slt(TMP, rhs_high, lhs_high); - __ Sltu(AT, lhs_low, rhs_low); - __ Blt(TMP, AT, label); - break; - case kCondGE: - __ Blt(rhs_high, lhs_high, label); - __ Slt(TMP, lhs_high, rhs_high); - __ Sltu(AT, lhs_low, rhs_low); - __ Or(TMP, TMP, AT); - __ Beqz(TMP, label); - break; - case kCondLE: - __ Blt(lhs_high, rhs_high, label); - __ Slt(TMP, rhs_high, lhs_high); - __ Sltu(AT, rhs_low, lhs_low); - __ Or(TMP, TMP, AT); - __ Beqz(TMP, label); - break; - case kCondGT: - __ Blt(rhs_high, lhs_high, label); - __ Slt(TMP, lhs_high, rhs_high); - __ Sltu(AT, rhs_low, lhs_low); - __ Blt(TMP, AT, label); - break; - case kCondB: - __ Bltu(lhs_high, rhs_high, label); - __ Sltu(TMP, rhs_high, lhs_high); - __ Sltu(AT, lhs_low, rhs_low); - __ Blt(TMP, AT, label); - break; - case kCondAE: - __ Bltu(rhs_high, lhs_high, label); - __ Sltu(TMP, lhs_high, rhs_high); - __ Sltu(AT, lhs_low, rhs_low); - __ Or(TMP, TMP, AT); - __ Beqz(TMP, label); - break; - case kCondBE: - __ Bltu(lhs_high, rhs_high, label); - __ Sltu(TMP, rhs_high, lhs_high); - __ Sltu(AT, rhs_low, lhs_low); - __ Or(TMP, TMP, AT); - __ Beqz(TMP, label); - break; - case kCondA: - __ Bltu(rhs_high, lhs_high, label); - __ Sltu(TMP, lhs_high, rhs_high); - __ Sltu(AT, rhs_low, lhs_low); - __ Blt(TMP, AT, label); - break; - } - } -} - -void InstructionCodeGeneratorMIPS::GenerateFpCompare(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* locations) { - Register dst = locations->Out().AsRegister<Register>(); - FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - if (type == DataType::Type::kFloat32) { - if (isR6) { - switch (cond) { - case kCondEQ: - __ CmpEqS(FTMP, lhs, rhs); - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondNE: - __ CmpEqS(FTMP, lhs, rhs); - __ Mfc1(dst, FTMP); - __ Addiu(dst, dst, 1); - break; - case kCondLT: - if (gt_bias) { - __ CmpLtS(FTMP, lhs, rhs); - } else { - __ CmpUltS(FTMP, lhs, rhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondLE: - if (gt_bias) { - __ CmpLeS(FTMP, lhs, rhs); - } else { - __ CmpUleS(FTMP, lhs, rhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondGT: - if (gt_bias) { - __ CmpUltS(FTMP, rhs, lhs); - } else { - __ CmpLtS(FTMP, rhs, lhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondGE: - if (gt_bias) { - __ CmpUleS(FTMP, rhs, lhs); - } else { - __ CmpLeS(FTMP, rhs, lhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - default: - LOG(FATAL) << "Unexpected non-floating-point condition " << cond; - UNREACHABLE(); - } - } else { - switch (cond) { - case kCondEQ: - __ CeqS(0, lhs, rhs); - __ LoadConst32(dst, 1); - __ Movf(dst, ZERO, 0); - break; - case kCondNE: - __ CeqS(0, lhs, rhs); - __ LoadConst32(dst, 1); - __ Movt(dst, ZERO, 0); - break; - case kCondLT: - if (gt_bias) { - __ ColtS(0, lhs, rhs); - } else { - __ CultS(0, lhs, rhs); - } - __ LoadConst32(dst, 1); - __ Movf(dst, ZERO, 0); - break; - case kCondLE: - if (gt_bias) { - __ ColeS(0, lhs, rhs); - } else { - __ CuleS(0, lhs, rhs); - } - __ LoadConst32(dst, 1); - __ Movf(dst, ZERO, 0); - break; - case kCondGT: - if (gt_bias) { - __ CultS(0, rhs, lhs); - } else { - __ ColtS(0, rhs, lhs); - } - __ LoadConst32(dst, 1); - __ Movf(dst, ZERO, 0); - break; - case kCondGE: - if (gt_bias) { - __ CuleS(0, rhs, lhs); - } else { - __ ColeS(0, rhs, lhs); - } - __ LoadConst32(dst, 1); - __ Movf(dst, ZERO, 0); - break; - default: - LOG(FATAL) << "Unexpected non-floating-point condition " << cond; - UNREACHABLE(); - } - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat64); - if (isR6) { - switch (cond) { - case kCondEQ: - __ CmpEqD(FTMP, lhs, rhs); - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondNE: - __ CmpEqD(FTMP, lhs, rhs); - __ Mfc1(dst, FTMP); - __ Addiu(dst, dst, 1); - break; - case kCondLT: - if (gt_bias) { - __ CmpLtD(FTMP, lhs, rhs); - } else { - __ CmpUltD(FTMP, lhs, rhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondLE: - if (gt_bias) { - __ CmpLeD(FTMP, lhs, rhs); - } else { - __ CmpUleD(FTMP, lhs, rhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondGT: - if (gt_bias) { - __ CmpUltD(FTMP, rhs, lhs); - } else { - __ CmpLtD(FTMP, rhs, lhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondGE: - if (gt_bias) { - __ CmpUleD(FTMP, rhs, lhs); - } else { - __ CmpLeD(FTMP, rhs, lhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - default: - LOG(FATAL) << "Unexpected non-floating-point condition " << cond; - UNREACHABLE(); - } - } else { - switch (cond) { - case kCondEQ: - __ CeqD(0, lhs, rhs); - __ LoadConst32(dst, 1); - __ Movf(dst, ZERO, 0); - break; - case kCondNE: - __ CeqD(0, lhs, rhs); - __ LoadConst32(dst, 1); - __ Movt(dst, ZERO, 0); - break; - case kCondLT: - if (gt_bias) { - __ ColtD(0, lhs, rhs); - } else { - __ CultD(0, lhs, rhs); - } - __ LoadConst32(dst, 1); - __ Movf(dst, ZERO, 0); - break; - case kCondLE: - if (gt_bias) { - __ ColeD(0, lhs, rhs); - } else { - __ CuleD(0, lhs, rhs); - } - __ LoadConst32(dst, 1); - __ Movf(dst, ZERO, 0); - break; - case kCondGT: - if (gt_bias) { - __ CultD(0, rhs, lhs); - } else { - __ ColtD(0, rhs, lhs); - } - __ LoadConst32(dst, 1); - __ Movf(dst, ZERO, 0); - break; - case kCondGE: - if (gt_bias) { - __ CuleD(0, rhs, lhs); - } else { - __ ColeD(0, rhs, lhs); - } - __ LoadConst32(dst, 1); - __ Movf(dst, ZERO, 0); - break; - default: - LOG(FATAL) << "Unexpected non-floating-point condition " << cond; - UNREACHABLE(); - } - } - } -} - -bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR2(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* input_locations, - int cc) { - FRegister lhs = input_locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister rhs = input_locations->InAt(1).AsFpuRegister<FRegister>(); - CHECK(!codegen_->GetInstructionSetFeatures().IsR6()); - if (type == DataType::Type::kFloat32) { - switch (cond) { - case kCondEQ: - __ CeqS(cc, lhs, rhs); - return false; - case kCondNE: - __ CeqS(cc, lhs, rhs); - return true; - case kCondLT: - if (gt_bias) { - __ ColtS(cc, lhs, rhs); - } else { - __ CultS(cc, lhs, rhs); - } - return false; - case kCondLE: - if (gt_bias) { - __ ColeS(cc, lhs, rhs); - } else { - __ CuleS(cc, lhs, rhs); - } - return false; - case kCondGT: - if (gt_bias) { - __ CultS(cc, rhs, lhs); - } else { - __ ColtS(cc, rhs, lhs); - } - return false; - case kCondGE: - if (gt_bias) { - __ CuleS(cc, rhs, lhs); - } else { - __ ColeS(cc, rhs, lhs); - } - return false; - default: - LOG(FATAL) << "Unexpected non-floating-point condition"; - UNREACHABLE(); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat64); - switch (cond) { - case kCondEQ: - __ CeqD(cc, lhs, rhs); - return false; - case kCondNE: - __ CeqD(cc, lhs, rhs); - return true; - case kCondLT: - if (gt_bias) { - __ ColtD(cc, lhs, rhs); - } else { - __ CultD(cc, lhs, rhs); - } - return false; - case kCondLE: - if (gt_bias) { - __ ColeD(cc, lhs, rhs); - } else { - __ CuleD(cc, lhs, rhs); - } - return false; - case kCondGT: - if (gt_bias) { - __ CultD(cc, rhs, lhs); - } else { - __ ColtD(cc, rhs, lhs); - } - return false; - case kCondGE: - if (gt_bias) { - __ CuleD(cc, rhs, lhs); - } else { - __ ColeD(cc, rhs, lhs); - } - return false; - default: - LOG(FATAL) << "Unexpected non-floating-point condition"; - UNREACHABLE(); - } - } -} - -bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR6(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* input_locations, - FRegister dst) { - FRegister lhs = input_locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister rhs = input_locations->InAt(1).AsFpuRegister<FRegister>(); - CHECK(codegen_->GetInstructionSetFeatures().IsR6()); - if (type == DataType::Type::kFloat32) { - switch (cond) { - case kCondEQ: - __ CmpEqS(dst, lhs, rhs); - return false; - case kCondNE: - __ CmpEqS(dst, lhs, rhs); - return true; - case kCondLT: - if (gt_bias) { - __ CmpLtS(dst, lhs, rhs); - } else { - __ CmpUltS(dst, lhs, rhs); - } - return false; - case kCondLE: - if (gt_bias) { - __ CmpLeS(dst, lhs, rhs); - } else { - __ CmpUleS(dst, lhs, rhs); - } - return false; - case kCondGT: - if (gt_bias) { - __ CmpUltS(dst, rhs, lhs); - } else { - __ CmpLtS(dst, rhs, lhs); - } - return false; - case kCondGE: - if (gt_bias) { - __ CmpUleS(dst, rhs, lhs); - } else { - __ CmpLeS(dst, rhs, lhs); - } - return false; - default: - LOG(FATAL) << "Unexpected non-floating-point condition"; - UNREACHABLE(); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat64); - switch (cond) { - case kCondEQ: - __ CmpEqD(dst, lhs, rhs); - return false; - case kCondNE: - __ CmpEqD(dst, lhs, rhs); - return true; - case kCondLT: - if (gt_bias) { - __ CmpLtD(dst, lhs, rhs); - } else { - __ CmpUltD(dst, lhs, rhs); - } - return false; - case kCondLE: - if (gt_bias) { - __ CmpLeD(dst, lhs, rhs); - } else { - __ CmpUleD(dst, lhs, rhs); - } - return false; - case kCondGT: - if (gt_bias) { - __ CmpUltD(dst, rhs, lhs); - } else { - __ CmpLtD(dst, rhs, lhs); - } - return false; - case kCondGE: - if (gt_bias) { - __ CmpUleD(dst, rhs, lhs); - } else { - __ CmpLeD(dst, rhs, lhs); - } - return false; - default: - LOG(FATAL) << "Unexpected non-floating-point condition"; - UNREACHABLE(); - } - } -} - -void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* locations, - MipsLabel* label) { - FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - if (type == DataType::Type::kFloat32) { - if (isR6) { - switch (cond) { - case kCondEQ: - __ CmpEqS(FTMP, lhs, rhs); - __ Bc1nez(FTMP, label); - break; - case kCondNE: - __ CmpEqS(FTMP, lhs, rhs); - __ Bc1eqz(FTMP, label); - break; - case kCondLT: - if (gt_bias) { - __ CmpLtS(FTMP, lhs, rhs); - } else { - __ CmpUltS(FTMP, lhs, rhs); - } - __ Bc1nez(FTMP, label); - break; - case kCondLE: - if (gt_bias) { - __ CmpLeS(FTMP, lhs, rhs); - } else { - __ CmpUleS(FTMP, lhs, rhs); - } - __ Bc1nez(FTMP, label); - break; - case kCondGT: - if (gt_bias) { - __ CmpUltS(FTMP, rhs, lhs); - } else { - __ CmpLtS(FTMP, rhs, lhs); - } - __ Bc1nez(FTMP, label); - break; - case kCondGE: - if (gt_bias) { - __ CmpUleS(FTMP, rhs, lhs); - } else { - __ CmpLeS(FTMP, rhs, lhs); - } - __ Bc1nez(FTMP, label); - break; - default: - LOG(FATAL) << "Unexpected non-floating-point condition"; - UNREACHABLE(); - } - } else { - switch (cond) { - case kCondEQ: - __ CeqS(0, lhs, rhs); - __ Bc1t(0, label); - break; - case kCondNE: - __ CeqS(0, lhs, rhs); - __ Bc1f(0, label); - break; - case kCondLT: - if (gt_bias) { - __ ColtS(0, lhs, rhs); - } else { - __ CultS(0, lhs, rhs); - } - __ Bc1t(0, label); - break; - case kCondLE: - if (gt_bias) { - __ ColeS(0, lhs, rhs); - } else { - __ CuleS(0, lhs, rhs); - } - __ Bc1t(0, label); - break; - case kCondGT: - if (gt_bias) { - __ CultS(0, rhs, lhs); - } else { - __ ColtS(0, rhs, lhs); - } - __ Bc1t(0, label); - break; - case kCondGE: - if (gt_bias) { - __ CuleS(0, rhs, lhs); - } else { - __ ColeS(0, rhs, lhs); - } - __ Bc1t(0, label); - break; - default: - LOG(FATAL) << "Unexpected non-floating-point condition"; - UNREACHABLE(); - } - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat64); - if (isR6) { - switch (cond) { - case kCondEQ: - __ CmpEqD(FTMP, lhs, rhs); - __ Bc1nez(FTMP, label); - break; - case kCondNE: - __ CmpEqD(FTMP, lhs, rhs); - __ Bc1eqz(FTMP, label); - break; - case kCondLT: - if (gt_bias) { - __ CmpLtD(FTMP, lhs, rhs); - } else { - __ CmpUltD(FTMP, lhs, rhs); - } - __ Bc1nez(FTMP, label); - break; - case kCondLE: - if (gt_bias) { - __ CmpLeD(FTMP, lhs, rhs); - } else { - __ CmpUleD(FTMP, lhs, rhs); - } - __ Bc1nez(FTMP, label); - break; - case kCondGT: - if (gt_bias) { - __ CmpUltD(FTMP, rhs, lhs); - } else { - __ CmpLtD(FTMP, rhs, lhs); - } - __ Bc1nez(FTMP, label); - break; - case kCondGE: - if (gt_bias) { - __ CmpUleD(FTMP, rhs, lhs); - } else { - __ CmpLeD(FTMP, rhs, lhs); - } - __ Bc1nez(FTMP, label); - break; - default: - LOG(FATAL) << "Unexpected non-floating-point condition"; - UNREACHABLE(); - } - } else { - switch (cond) { - case kCondEQ: - __ CeqD(0, lhs, rhs); - __ Bc1t(0, label); - break; - case kCondNE: - __ CeqD(0, lhs, rhs); - __ Bc1f(0, label); - break; - case kCondLT: - if (gt_bias) { - __ ColtD(0, lhs, rhs); - } else { - __ CultD(0, lhs, rhs); - } - __ Bc1t(0, label); - break; - case kCondLE: - if (gt_bias) { - __ ColeD(0, lhs, rhs); - } else { - __ CuleD(0, lhs, rhs); - } - __ Bc1t(0, label); - break; - case kCondGT: - if (gt_bias) { - __ CultD(0, rhs, lhs); - } else { - __ ColtD(0, rhs, lhs); - } - __ Bc1t(0, label); - break; - case kCondGE: - if (gt_bias) { - __ CuleD(0, rhs, lhs); - } else { - __ ColeD(0, rhs, lhs); - } - __ Bc1t(0, label); - break; - default: - LOG(FATAL) << "Unexpected non-floating-point condition"; - UNREACHABLE(); - } - } - } -} - -void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instruction, - size_t condition_input_index, - MipsLabel* true_target, - MipsLabel* false_target) { - HInstruction* cond = instruction->InputAt(condition_input_index); - - if (true_target == nullptr && false_target == nullptr) { - // Nothing to do. The code always falls through. - return; - } else if (cond->IsIntConstant()) { - // Constant condition, statically compared against "true" (integer value 1). - if (cond->AsIntConstant()->IsTrue()) { - if (true_target != nullptr) { - __ B(true_target); - } - } else { - DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); - if (false_target != nullptr) { - __ B(false_target); - } - } - return; - } - - // The following code generates these patterns: - // (1) true_target == nullptr && false_target != nullptr - // - opposite condition true => branch to false_target - // (2) true_target != nullptr && false_target == nullptr - // - condition true => branch to true_target - // (3) true_target != nullptr && false_target != nullptr - // - condition true => branch to true_target - // - branch to false_target - if (IsBooleanValueOrMaterializedCondition(cond)) { - // The condition instruction has been materialized, compare the output to 0. - Location cond_val = instruction->GetLocations()->InAt(condition_input_index); - DCHECK(cond_val.IsRegister()); - if (true_target == nullptr) { - __ Beqz(cond_val.AsRegister<Register>(), false_target); - } else { - __ Bnez(cond_val.AsRegister<Register>(), true_target); - } - } else { - // The condition instruction has not been materialized, use its inputs as - // the comparison and its condition as the branch condition. - HCondition* condition = cond->AsCondition(); - DataType::Type type = condition->InputAt(0)->GetType(); - LocationSummary* locations = cond->GetLocations(); - IfCondition if_cond = condition->GetCondition(); - MipsLabel* branch_target = true_target; - - if (true_target == nullptr) { - if_cond = condition->GetOppositeCondition(); - branch_target = false_target; - } - - switch (type) { - default: - GenerateIntCompareAndBranch(if_cond, locations, branch_target); - break; - case DataType::Type::kInt64: - GenerateLongCompareAndBranch(if_cond, locations, branch_target); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target); - break; - } - } - - // If neither branch falls through (case 3), the conditional branch to `true_target` - // was already emitted (case 2) and we need to emit a jump to `false_target`. - if (true_target != nullptr && false_target != nullptr) { - __ B(false_target); - } -} - -void LocationsBuilderMIPS::VisitIf(HIf* if_instr) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); - if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { - locations->SetInAt(0, Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) { - HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); - HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); - MipsLabel* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? - nullptr : codegen_->GetLabelOf(true_successor); - MipsLabel* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? - nullptr : codegen_->GetLabelOf(false_successor); - GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); -} - -void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) - LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - InvokeRuntimeCallingConvention calling_convention; - RegisterSet caller_saves = RegisterSet::Empty(); - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); - if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { - locations->SetInAt(0, Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCodeMIPS* slow_path = - deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS>(deoptimize); - GenerateTestAndBranch(deoptimize, - /* condition_input_index= */ 0, - slow_path->GetEntryLabel(), - /* false_target= */ nullptr); -} - -// This function returns true if a conditional move can be generated for HSelect. -// Otherwise it returns false and HSelect must be implemented in terms of conditonal -// branches and regular moves. -// -// If `locations_to_set` isn't nullptr, its inputs and outputs are set for HSelect. -// -// While determining feasibility of a conditional move and setting inputs/outputs -// are two distinct tasks, this function does both because they share quite a bit -// of common logic. -static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* locations_to_set) { - bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition()); - HInstruction* cond = select->InputAt(/* i= */ 2); - HCondition* condition = cond->AsCondition(); - - DataType::Type cond_type = - materialized ? DataType::Type::kInt32 : condition->InputAt(0)->GetType(); - DataType::Type dst_type = select->GetType(); - - HConstant* cst_true_value = select->GetTrueValue()->AsConstant(); - HConstant* cst_false_value = select->GetFalseValue()->AsConstant(); - bool is_true_value_zero_constant = - (cst_true_value != nullptr && cst_true_value->IsZeroBitPattern()); - bool is_false_value_zero_constant = - (cst_false_value != nullptr && cst_false_value->IsZeroBitPattern()); - - bool can_move_conditionally = false; - bool use_const_for_false_in = false; - bool use_const_for_true_in = false; - - if (!cond->IsConstant()) { - switch (cond_type) { - default: - switch (dst_type) { - default: - // Moving int on int condition. - if (is_r6) { - if (is_true_value_zero_constant) { - // seleqz out_reg, false_reg, cond_reg - can_move_conditionally = true; - use_const_for_true_in = true; - } else if (is_false_value_zero_constant) { - // selnez out_reg, true_reg, cond_reg - can_move_conditionally = true; - use_const_for_false_in = true; - } else if (materialized) { - // Not materializing unmaterialized int conditions - // to keep the instruction count low. - // selnez AT, true_reg, cond_reg - // seleqz TMP, false_reg, cond_reg - // or out_reg, AT, TMP - can_move_conditionally = true; - } - } else { - // movn out_reg, true_reg/ZERO, cond_reg - can_move_conditionally = true; - use_const_for_true_in = is_true_value_zero_constant; - } - break; - case DataType::Type::kInt64: - // Moving long on int condition. - if (is_r6) { - if (is_true_value_zero_constant) { - // seleqz out_reg_lo, false_reg_lo, cond_reg - // seleqz out_reg_hi, false_reg_hi, cond_reg - can_move_conditionally = true; - use_const_for_true_in = true; - } else if (is_false_value_zero_constant) { - // selnez out_reg_lo, true_reg_lo, cond_reg - // selnez out_reg_hi, true_reg_hi, cond_reg - can_move_conditionally = true; - use_const_for_false_in = true; - } - // Other long conditional moves would generate 6+ instructions, - // which is too many. - } else { - // movn out_reg_lo, true_reg_lo/ZERO, cond_reg - // movn out_reg_hi, true_reg_hi/ZERO, cond_reg - can_move_conditionally = true; - use_const_for_true_in = is_true_value_zero_constant; - } - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - // Moving float/double on int condition. - if (is_r6) { - if (materialized) { - // Not materializing unmaterialized int conditions - // to keep the instruction count low. - can_move_conditionally = true; - if (is_true_value_zero_constant) { - // sltu TMP, ZERO, cond_reg - // mtc1 TMP, temp_cond_reg - // seleqz.fmt out_reg, false_reg, temp_cond_reg - use_const_for_true_in = true; - } else if (is_false_value_zero_constant) { - // sltu TMP, ZERO, cond_reg - // mtc1 TMP, temp_cond_reg - // selnez.fmt out_reg, true_reg, temp_cond_reg - use_const_for_false_in = true; - } else { - // sltu TMP, ZERO, cond_reg - // mtc1 TMP, temp_cond_reg - // sel.fmt temp_cond_reg, false_reg, true_reg - // mov.fmt out_reg, temp_cond_reg - } - } - } else { - // movn.fmt out_reg, true_reg, cond_reg - can_move_conditionally = true; - } - break; - } - break; - case DataType::Type::kInt64: - // We don't materialize long comparison now - // and use conditional branches instead. - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - switch (dst_type) { - default: - // Moving int on float/double condition. - if (is_r6) { - if (is_true_value_zero_constant) { - // mfc1 TMP, temp_cond_reg - // seleqz out_reg, false_reg, TMP - can_move_conditionally = true; - use_const_for_true_in = true; - } else if (is_false_value_zero_constant) { - // mfc1 TMP, temp_cond_reg - // selnez out_reg, true_reg, TMP - can_move_conditionally = true; - use_const_for_false_in = true; - } else { - // mfc1 TMP, temp_cond_reg - // selnez AT, true_reg, TMP - // seleqz TMP, false_reg, TMP - // or out_reg, AT, TMP - can_move_conditionally = true; - } - } else { - // movt out_reg, true_reg/ZERO, cc - can_move_conditionally = true; - use_const_for_true_in = is_true_value_zero_constant; - } - break; - case DataType::Type::kInt64: - // Moving long on float/double condition. - if (is_r6) { - if (is_true_value_zero_constant) { - // mfc1 TMP, temp_cond_reg - // seleqz out_reg_lo, false_reg_lo, TMP - // seleqz out_reg_hi, false_reg_hi, TMP - can_move_conditionally = true; - use_const_for_true_in = true; - } else if (is_false_value_zero_constant) { - // mfc1 TMP, temp_cond_reg - // selnez out_reg_lo, true_reg_lo, TMP - // selnez out_reg_hi, true_reg_hi, TMP - can_move_conditionally = true; - use_const_for_false_in = true; - } - // Other long conditional moves would generate 6+ instructions, - // which is too many. - } else { - // movt out_reg_lo, true_reg_lo/ZERO, cc - // movt out_reg_hi, true_reg_hi/ZERO, cc - can_move_conditionally = true; - use_const_for_true_in = is_true_value_zero_constant; - } - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - // Moving float/double on float/double condition. - if (is_r6) { - can_move_conditionally = true; - if (is_true_value_zero_constant) { - // seleqz.fmt out_reg, false_reg, temp_cond_reg - use_const_for_true_in = true; - } else if (is_false_value_zero_constant) { - // selnez.fmt out_reg, true_reg, temp_cond_reg - use_const_for_false_in = true; - } else { - // sel.fmt temp_cond_reg, false_reg, true_reg - // mov.fmt out_reg, temp_cond_reg - } - } else { - // movt.fmt out_reg, true_reg, cc - can_move_conditionally = true; - } - break; - } - break; - } - } - - if (can_move_conditionally) { - DCHECK(!use_const_for_false_in || !use_const_for_true_in); - } else { - DCHECK(!use_const_for_false_in); - DCHECK(!use_const_for_true_in); - } - - if (locations_to_set != nullptr) { - if (use_const_for_false_in) { - locations_to_set->SetInAt(0, Location::ConstantLocation(cst_false_value)); - } else { - locations_to_set->SetInAt(0, - DataType::IsFloatingPointType(dst_type) - ? Location::RequiresFpuRegister() - : Location::RequiresRegister()); - } - if (use_const_for_true_in) { - locations_to_set->SetInAt(1, Location::ConstantLocation(cst_true_value)); - } else { - locations_to_set->SetInAt(1, - DataType::IsFloatingPointType(dst_type) - ? Location::RequiresFpuRegister() - : Location::RequiresRegister()); - } - if (materialized) { - locations_to_set->SetInAt(2, Location::RequiresRegister()); - } - // On R6 we don't require the output to be the same as the - // first input for conditional moves unlike on R2. - bool is_out_same_as_first_in = !can_move_conditionally || !is_r6; - if (is_out_same_as_first_in) { - locations_to_set->SetOut(Location::SameAsFirstInput()); - } else { - locations_to_set->SetOut(DataType::IsFloatingPointType(dst_type) - ? Location::RequiresFpuRegister() - : Location::RequiresRegister()); - } - } - - return can_move_conditionally; -} - -void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) { - LocationSummary* locations = select->GetLocations(); - Location dst = locations->Out(); - Location src = locations->InAt(1); - Register src_reg = ZERO; - Register src_reg_high = ZERO; - HInstruction* cond = select->InputAt(/* i= */ 2); - Register cond_reg = TMP; - int cond_cc = 0; - DataType::Type cond_type = DataType::Type::kInt32; - bool cond_inverted = false; - DataType::Type dst_type = select->GetType(); - - if (IsBooleanValueOrMaterializedCondition(cond)) { - cond_reg = locations->InAt(/* at= */ 2).AsRegister<Register>(); - } else { - HCondition* condition = cond->AsCondition(); - LocationSummary* cond_locations = cond->GetLocations(); - IfCondition if_cond = condition->GetCondition(); - cond_type = condition->InputAt(0)->GetType(); - switch (cond_type) { - default: - DCHECK_NE(cond_type, DataType::Type::kInt64); - cond_inverted = MaterializeIntCompare(if_cond, cond_locations, cond_reg); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - cond_inverted = MaterializeFpCompareR2(if_cond, - condition->IsGtBias(), - cond_type, - cond_locations, - cond_cc); - break; - } - } - - DCHECK(dst.Equals(locations->InAt(0))); - if (src.IsRegister()) { - src_reg = src.AsRegister<Register>(); - } else if (src.IsRegisterPair()) { - src_reg = src.AsRegisterPairLow<Register>(); - src_reg_high = src.AsRegisterPairHigh<Register>(); - } else if (src.IsConstant()) { - DCHECK(src.GetConstant()->IsZeroBitPattern()); - } - - switch (cond_type) { - default: - switch (dst_type) { - default: - if (cond_inverted) { - __ Movz(dst.AsRegister<Register>(), src_reg, cond_reg); - } else { - __ Movn(dst.AsRegister<Register>(), src_reg, cond_reg); - } - break; - case DataType::Type::kInt64: - if (cond_inverted) { - __ Movz(dst.AsRegisterPairLow<Register>(), src_reg, cond_reg); - __ Movz(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_reg); - } else { - __ Movn(dst.AsRegisterPairLow<Register>(), src_reg, cond_reg); - __ Movn(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_reg); - } - break; - case DataType::Type::kFloat32: - if (cond_inverted) { - __ MovzS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg); - } else { - __ MovnS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg); - } - break; - case DataType::Type::kFloat64: - if (cond_inverted) { - __ MovzD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg); - } else { - __ MovnD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg); - } - break; - } - break; - case DataType::Type::kInt64: - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - switch (dst_type) { - default: - if (cond_inverted) { - __ Movf(dst.AsRegister<Register>(), src_reg, cond_cc); - } else { - __ Movt(dst.AsRegister<Register>(), src_reg, cond_cc); - } - break; - case DataType::Type::kInt64: - if (cond_inverted) { - __ Movf(dst.AsRegisterPairLow<Register>(), src_reg, cond_cc); - __ Movf(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_cc); - } else { - __ Movt(dst.AsRegisterPairLow<Register>(), src_reg, cond_cc); - __ Movt(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_cc); - } - break; - case DataType::Type::kFloat32: - if (cond_inverted) { - __ MovfS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc); - } else { - __ MovtS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc); - } - break; - case DataType::Type::kFloat64: - if (cond_inverted) { - __ MovfD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc); - } else { - __ MovtD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc); - } - break; - } - break; - } -} - -void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { - LocationSummary* locations = select->GetLocations(); - Location dst = locations->Out(); - Location false_src = locations->InAt(0); - Location true_src = locations->InAt(1); - HInstruction* cond = select->InputAt(/* i= */ 2); - Register cond_reg = TMP; - FRegister fcond_reg = FTMP; - DataType::Type cond_type = DataType::Type::kInt32; - bool cond_inverted = false; - DataType::Type dst_type = select->GetType(); - - if (IsBooleanValueOrMaterializedCondition(cond)) { - cond_reg = locations->InAt(/* at= */ 2).AsRegister<Register>(); - } else { - HCondition* condition = cond->AsCondition(); - LocationSummary* cond_locations = cond->GetLocations(); - IfCondition if_cond = condition->GetCondition(); - cond_type = condition->InputAt(0)->GetType(); - switch (cond_type) { - default: - DCHECK_NE(cond_type, DataType::Type::kInt64); - cond_inverted = MaterializeIntCompare(if_cond, cond_locations, cond_reg); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - cond_inverted = MaterializeFpCompareR6(if_cond, - condition->IsGtBias(), - cond_type, - cond_locations, - fcond_reg); - break; - } - } - - if (true_src.IsConstant()) { - DCHECK(true_src.GetConstant()->IsZeroBitPattern()); - } - if (false_src.IsConstant()) { - DCHECK(false_src.GetConstant()->IsZeroBitPattern()); - } - - switch (dst_type) { - default: - if (DataType::IsFloatingPointType(cond_type)) { - __ Mfc1(cond_reg, fcond_reg); - } - if (true_src.IsConstant()) { - if (cond_inverted) { - __ Selnez(dst.AsRegister<Register>(), false_src.AsRegister<Register>(), cond_reg); - } else { - __ Seleqz(dst.AsRegister<Register>(), false_src.AsRegister<Register>(), cond_reg); - } - } else if (false_src.IsConstant()) { - if (cond_inverted) { - __ Seleqz(dst.AsRegister<Register>(), true_src.AsRegister<Register>(), cond_reg); - } else { - __ Selnez(dst.AsRegister<Register>(), true_src.AsRegister<Register>(), cond_reg); - } - } else { - DCHECK_NE(cond_reg, AT); - if (cond_inverted) { - __ Seleqz(AT, true_src.AsRegister<Register>(), cond_reg); - __ Selnez(TMP, false_src.AsRegister<Register>(), cond_reg); - } else { - __ Selnez(AT, true_src.AsRegister<Register>(), cond_reg); - __ Seleqz(TMP, false_src.AsRegister<Register>(), cond_reg); - } - __ Or(dst.AsRegister<Register>(), AT, TMP); - } - break; - case DataType::Type::kInt64: { - if (DataType::IsFloatingPointType(cond_type)) { - __ Mfc1(cond_reg, fcond_reg); - } - Register dst_lo = dst.AsRegisterPairLow<Register>(); - Register dst_hi = dst.AsRegisterPairHigh<Register>(); - if (true_src.IsConstant()) { - Register src_lo = false_src.AsRegisterPairLow<Register>(); - Register src_hi = false_src.AsRegisterPairHigh<Register>(); - if (cond_inverted) { - __ Selnez(dst_lo, src_lo, cond_reg); - __ Selnez(dst_hi, src_hi, cond_reg); - } else { - __ Seleqz(dst_lo, src_lo, cond_reg); - __ Seleqz(dst_hi, src_hi, cond_reg); - } - } else { - DCHECK(false_src.IsConstant()); - Register src_lo = true_src.AsRegisterPairLow<Register>(); - Register src_hi = true_src.AsRegisterPairHigh<Register>(); - if (cond_inverted) { - __ Seleqz(dst_lo, src_lo, cond_reg); - __ Seleqz(dst_hi, src_hi, cond_reg); - } else { - __ Selnez(dst_lo, src_lo, cond_reg); - __ Selnez(dst_hi, src_hi, cond_reg); - } - } - break; - } - case DataType::Type::kFloat32: { - if (!DataType::IsFloatingPointType(cond_type)) { - // sel*.fmt tests bit 0 of the condition register, account for that. - __ Sltu(TMP, ZERO, cond_reg); - __ Mtc1(TMP, fcond_reg); - } - FRegister dst_reg = dst.AsFpuRegister<FRegister>(); - if (true_src.IsConstant()) { - FRegister src_reg = false_src.AsFpuRegister<FRegister>(); - if (cond_inverted) { - __ SelnezS(dst_reg, src_reg, fcond_reg); - } else { - __ SeleqzS(dst_reg, src_reg, fcond_reg); - } - } else if (false_src.IsConstant()) { - FRegister src_reg = true_src.AsFpuRegister<FRegister>(); - if (cond_inverted) { - __ SeleqzS(dst_reg, src_reg, fcond_reg); - } else { - __ SelnezS(dst_reg, src_reg, fcond_reg); - } - } else { - if (cond_inverted) { - __ SelS(fcond_reg, - true_src.AsFpuRegister<FRegister>(), - false_src.AsFpuRegister<FRegister>()); - } else { - __ SelS(fcond_reg, - false_src.AsFpuRegister<FRegister>(), - true_src.AsFpuRegister<FRegister>()); - } - __ MovS(dst_reg, fcond_reg); - } - break; - } - case DataType::Type::kFloat64: { - if (!DataType::IsFloatingPointType(cond_type)) { - // sel*.fmt tests bit 0 of the condition register, account for that. - __ Sltu(TMP, ZERO, cond_reg); - __ Mtc1(TMP, fcond_reg); - } - FRegister dst_reg = dst.AsFpuRegister<FRegister>(); - if (true_src.IsConstant()) { - FRegister src_reg = false_src.AsFpuRegister<FRegister>(); - if (cond_inverted) { - __ SelnezD(dst_reg, src_reg, fcond_reg); - } else { - __ SeleqzD(dst_reg, src_reg, fcond_reg); - } - } else if (false_src.IsConstant()) { - FRegister src_reg = true_src.AsFpuRegister<FRegister>(); - if (cond_inverted) { - __ SeleqzD(dst_reg, src_reg, fcond_reg); - } else { - __ SelnezD(dst_reg, src_reg, fcond_reg); - } - } else { - if (cond_inverted) { - __ SelD(fcond_reg, - true_src.AsFpuRegister<FRegister>(), - false_src.AsFpuRegister<FRegister>()); - } else { - __ SelD(fcond_reg, - false_src.AsFpuRegister<FRegister>(), - true_src.AsFpuRegister<FRegister>()); - } - __ MovD(dst_reg, fcond_reg); - } - break; - } - } -} - -void LocationsBuilderMIPS::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) - LocationSummary(flag, LocationSummary::kNoCall); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorMIPS::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { - __ LoadFromOffset(kLoadWord, - flag->GetLocations()->Out().AsRegister<Register>(), - SP, - codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); -} - -void LocationsBuilderMIPS::VisitSelect(HSelect* select) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); - CanMoveConditionally(select, codegen_->GetInstructionSetFeatures().IsR6(), locations); -} - -void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) { - bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6(); - if (CanMoveConditionally(select, is_r6, /* locations_to_set= */ nullptr)) { - if (is_r6) { - GenConditionalMoveR6(select); - } else { - GenConditionalMoveR2(select); - } - } else { - LocationSummary* locations = select->GetLocations(); - MipsLabel false_target; - GenerateTestAndBranch(select, - /* condition_input_index= */ 2, - /* true_target= */ nullptr, - &false_target); - codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); - __ Bind(&false_target); - } -} - -void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetAllocator()) LocationSummary(info); -} - -void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo*) { - // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. -} - -void CodeGeneratorMIPS::GenerateNop() { - __ Nop(); -} - -void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { - DataType::Type field_type = field_info.GetFieldType(); - bool is_wide = (field_type == DataType::Type::kInt64) || (field_type == DataType::Type::kFloat64); - bool generate_volatile = field_info.IsVolatile() && is_wide; - bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (field_type == DataType::Type::kReference); - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, - generate_volatile - ? LocationSummary::kCallOnMainOnly - : (object_field_get_with_read_barrier - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall)); - - if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - } - locations->SetInAt(0, Location::RequiresRegister()); - if (generate_volatile) { - InvokeRuntimeCallingConvention calling_convention; - // need A0 to hold base + offset - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - if (field_type == DataType::Type::kInt64) { - locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt64)); - } else { - // Use Location::Any() to prevent situations when running out of available fp registers. - locations->SetOut(Location::Any()); - // Need some temp core regs since FP results are returned in core registers - Location reg = calling_convention.GetReturnLocation(DataType::Type::kInt64); - locations->AddTemp(Location::RegisterLocation(reg.AsRegisterPairLow<Register>())); - locations->AddTemp(Location::RegisterLocation(reg.AsRegisterPairHigh<Register>())); - } - } else { - if (DataType::IsFloatingPointType(instruction->GetType())) { - locations->SetOut(Location::RequiresFpuRegister()); - } else { - // The output overlaps in the case of an object field get with - // read barriers enabled: we do not want the move to overwrite the - // object's location, as we need it to emit the read barrier. - locations->SetOut(Location::RequiresRegister(), - object_field_get_with_read_barrier - ? Location::kOutputOverlap - : Location::kNoOutputOverlap); - } - if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier. - if (!kBakerReadBarrierThunksEnableForFields) { - locations->AddTemp(Location::RequiresRegister()); - } - } - } -} - -void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, - const FieldInfo& field_info, - uint32_t dex_pc) { - DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); - DataType::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - Location obj_loc = locations->InAt(0); - Register obj = obj_loc.AsRegister<Register>(); - Location dst_loc = locations->Out(); - LoadOperandType load_type = kLoadUnsignedByte; - bool is_volatile = field_info.IsVolatile(); - uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - auto null_checker = GetImplicitNullChecker(instruction, codegen_); - - switch (type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - load_type = kLoadUnsignedByte; - break; - case DataType::Type::kInt8: - load_type = kLoadSignedByte; - break; - case DataType::Type::kUint16: - load_type = kLoadUnsignedHalfword; - break; - case DataType::Type::kInt16: - load_type = kLoadSignedHalfword; - break; - case DataType::Type::kInt32: - case DataType::Type::kFloat32: - case DataType::Type::kReference: - load_type = kLoadWord; - break; - case DataType::Type::kInt64: - case DataType::Type::kFloat64: - load_type = kLoadDoubleword; - break; - case DataType::Type::kUint32: - case DataType::Type::kUint64: - case DataType::Type::kVoid: - LOG(FATAL) << "Unreachable type " << type; - UNREACHABLE(); - } - - if (is_volatile && load_type == kLoadDoubleword) { - InvokeRuntimeCallingConvention calling_convention; - __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); - // Do implicit Null check - __ LoadFromOffset(kLoadWord, - ZERO, - locations->GetTemp(0).AsRegister<Register>(), - 0, - null_checker); - codegen_->InvokeRuntime(kQuickA64Load, instruction, dex_pc); - CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>(); - if (type == DataType::Type::kFloat64) { - // FP results are returned in core registers. Need to move them. - if (dst_loc.IsFpuRegister()) { - __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), dst_loc.AsFpuRegister<FRegister>()); - __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(), - dst_loc.AsFpuRegister<FRegister>()); - } else { - DCHECK(dst_loc.IsDoubleStackSlot()); - __ StoreToOffset(kStoreWord, - locations->GetTemp(1).AsRegister<Register>(), - SP, - dst_loc.GetStackIndex()); - __ StoreToOffset(kStoreWord, - locations->GetTemp(2).AsRegister<Register>(), - SP, - dst_loc.GetStackIndex() + 4); - } - } - } else { - if (type == DataType::Type::kReference) { - // /* HeapReference<Object> */ dst = *(obj + offset) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = - kBakerReadBarrierThunksEnableForFields ? Location::NoLocation() : locations->GetTemp(0); - // Note that a potential implicit null check is handled in this - // CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier call. - codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, - dst_loc, - obj, - offset, - temp_loc, - /* needs_null_check= */ true); - if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kLoadAny); - } - } else { - __ LoadFromOffset(kLoadWord, dst_loc.AsRegister<Register>(), obj, offset, null_checker); - if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kLoadAny); - } - // If read barriers are enabled, emit read barriers other than - // Baker's using a slow path (and also unpoison the loaded - // reference, if heap poisoning is enabled). - codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset); - } - } else if (!DataType::IsFloatingPointType(type)) { - Register dst; - if (type == DataType::Type::kInt64) { - DCHECK(dst_loc.IsRegisterPair()); - dst = dst_loc.AsRegisterPairLow<Register>(); - } else { - DCHECK(dst_loc.IsRegister()); - dst = dst_loc.AsRegister<Register>(); - } - __ LoadFromOffset(load_type, dst, obj, offset, null_checker); - } else { - DCHECK(dst_loc.IsFpuRegister()); - FRegister dst = dst_loc.AsFpuRegister<FRegister>(); - if (type == DataType::Type::kFloat32) { - __ LoadSFromOffset(dst, obj, offset, null_checker); - } else { - __ LoadDFromOffset(dst, obj, offset, null_checker); - } - } - } - - // Memory barriers, in the case of references, are handled in the - // previous switch statement. - if (is_volatile && (type != DataType::Type::kReference)) { - GenerateMemoryBarrier(MemBarrierKind::kLoadAny); - } -} - -void LocationsBuilderMIPS::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { - DataType::Type field_type = field_info.GetFieldType(); - bool is_wide = (field_type == DataType::Type::kInt64) || (field_type == DataType::Type::kFloat64); - bool generate_volatile = field_info.IsVolatile() && is_wide; - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall); - - locations->SetInAt(0, Location::RequiresRegister()); - if (generate_volatile) { - InvokeRuntimeCallingConvention calling_convention; - // need A0 to hold base + offset - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - if (field_type == DataType::Type::kInt64) { - locations->SetInAt(1, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - } else { - // Use Location::Any() to prevent situations when running out of available fp registers. - locations->SetInAt(1, Location::Any()); - // Pass FP parameters in core registers. - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(3))); - } - } else { - if (DataType::IsFloatingPointType(field_type)) { - locations->SetInAt(1, FpuRegisterOrConstantForStore(instruction->InputAt(1))); - } else { - locations->SetInAt(1, RegisterOrZeroConstant(instruction->InputAt(1))); - } - } -} - -void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, - const FieldInfo& field_info, - uint32_t dex_pc, - bool value_can_be_null) { - DataType::Type type = field_info.GetFieldType(); - LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); - Location value_location = locations->InAt(1); - StoreOperandType store_type = kStoreByte; - bool is_volatile = field_info.IsVolatile(); - uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1)); - auto null_checker = GetImplicitNullChecker(instruction, codegen_); - - switch (type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - store_type = kStoreByte; - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - store_type = kStoreHalfword; - break; - case DataType::Type::kInt32: - case DataType::Type::kFloat32: - case DataType::Type::kReference: - store_type = kStoreWord; - break; - case DataType::Type::kInt64: - case DataType::Type::kFloat64: - store_type = kStoreDoubleword; - break; - case DataType::Type::kUint32: - case DataType::Type::kUint64: - case DataType::Type::kVoid: - LOG(FATAL) << "Unreachable type " << type; - UNREACHABLE(); - } - - if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); - } - - if (is_volatile && store_type == kStoreDoubleword) { - InvokeRuntimeCallingConvention calling_convention; - __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); - // Do implicit Null check. - __ LoadFromOffset(kLoadWord, - ZERO, - locations->GetTemp(0).AsRegister<Register>(), - 0, - null_checker); - if (type == DataType::Type::kFloat64) { - // Pass FP parameters in core registers. - if (value_location.IsFpuRegister()) { - __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), - value_location.AsFpuRegister<FRegister>()); - __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(), - value_location.AsFpuRegister<FRegister>()); - } else if (value_location.IsDoubleStackSlot()) { - __ LoadFromOffset(kLoadWord, - locations->GetTemp(1).AsRegister<Register>(), - SP, - value_location.GetStackIndex()); - __ LoadFromOffset(kLoadWord, - locations->GetTemp(2).AsRegister<Register>(), - SP, - value_location.GetStackIndex() + 4); - } else { - DCHECK(value_location.IsConstant()); - DCHECK(value_location.GetConstant()->IsDoubleConstant()); - int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); - __ LoadConst64(locations->GetTemp(2).AsRegister<Register>(), - locations->GetTemp(1).AsRegister<Register>(), - value); - } - } - codegen_->InvokeRuntime(kQuickA64Store, instruction, dex_pc); - CheckEntrypointTypes<kQuickA64Store, void, volatile int64_t *, int64_t>(); - } else { - if (value_location.IsConstant()) { - int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker); - } else if (!DataType::IsFloatingPointType(type)) { - Register src; - if (type == DataType::Type::kInt64) { - src = value_location.AsRegisterPairLow<Register>(); - } else { - src = value_location.AsRegister<Register>(); - } - if (kPoisonHeapReferences && needs_write_barrier) { - // Note that in the case where `value` is a null reference, - // we do not enter this block, as a null reference does not - // need poisoning. - DCHECK_EQ(type, DataType::Type::kReference); - __ PoisonHeapReference(TMP, src); - __ StoreToOffset(store_type, TMP, obj, offset, null_checker); - } else { - __ StoreToOffset(store_type, src, obj, offset, null_checker); - } - } else { - FRegister src = value_location.AsFpuRegister<FRegister>(); - if (type == DataType::Type::kFloat32) { - __ StoreSToOffset(src, obj, offset, null_checker); - } else { - __ StoreDToOffset(src, obj, offset, null_checker); - } - } - } - - if (needs_write_barrier) { - Register src = value_location.AsRegister<Register>(); - codegen_->MarkGCCard(obj, src, value_can_be_null); - } - - if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); - } -} - -void LocationsBuilderMIPS::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { - HandleFieldGet(instruction, instruction->GetFieldInfo()); -} - -void InstructionCodeGeneratorMIPS::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { - HandleFieldGet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc()); -} - -void LocationsBuilderMIPS::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); -} - -void InstructionCodeGeneratorMIPS::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, - instruction->GetFieldInfo(), - instruction->GetDexPc(), - instruction->GetValueCanBeNull()); -} - -void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister( - HInstruction* instruction, - Location out, - uint32_t offset, - Location maybe_temp, - ReadBarrierOption read_barrier_option) { - Register out_reg = out.AsRegister<Register>(); - if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); - if (!kUseBakerReadBarrier || !kBakerReadBarrierThunksEnableForFields) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; - } - if (kUseBakerReadBarrier) { - // Load with fast path based Baker's read barrier. - // /* HeapReference<Object> */ out = *(out + offset) - codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, - out, - out_reg, - offset, - maybe_temp, - /* needs_null_check= */ false); - } else { - // Load with slow path based read barrier. - // Save the value of `out` into `maybe_temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - __ Move(maybe_temp.AsRegister<Register>(), out_reg); - // /* HeapReference<Object> */ out = *(out + offset) - __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset); - codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); - } - } else { - // Plain load with no read barrier. - // /* HeapReference<Object> */ out = *(out + offset) - __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset); - __ MaybeUnpoisonHeapReference(out_reg); - } -} - -void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters( - HInstruction* instruction, - Location out, - Location obj, - uint32_t offset, - Location maybe_temp, - ReadBarrierOption read_barrier_option) { - Register out_reg = out.AsRegister<Register>(); - Register obj_reg = obj.AsRegister<Register>(); - if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); - if (kUseBakerReadBarrier) { - if (!kBakerReadBarrierThunksEnableForFields) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; - } - // Load with fast path based Baker's read barrier. - // /* HeapReference<Object> */ out = *(obj + offset) - codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, - out, - obj_reg, - offset, - maybe_temp, - /* needs_null_check= */ false); - } else { - // Load with slow path based read barrier. - // /* HeapReference<Object> */ out = *(obj + offset) - __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); - codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); - } - } else { - // Plain load with no read barrier. - // /* HeapReference<Object> */ out = *(obj + offset) - __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); - __ MaybeUnpoisonHeapReference(out_reg); - } -} - -static inline int GetBakerMarkThunkNumber(Register reg) { - static_assert(BAKER_MARK_INTROSPECTION_REGISTER_COUNT == 21, "Expecting equal"); - if (reg >= V0 && reg <= T7) { // 14 consequtive regs. - return reg - V0; - } else if (reg >= S2 && reg <= S7) { // 6 consequtive regs. - return 14 + (reg - S2); - } else if (reg == FP) { // One more. - return 20; - } - LOG(FATAL) << "Unexpected register " << reg; - UNREACHABLE(); -} - -static inline int GetBakerMarkFieldArrayThunkDisplacement(Register reg, bool short_offset) { - int num = GetBakerMarkThunkNumber(reg) + - (short_offset ? BAKER_MARK_INTROSPECTION_REGISTER_COUNT : 0); - return num * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE; -} - -static inline int GetBakerMarkGcRootThunkDisplacement(Register reg) { - return GetBakerMarkThunkNumber(reg) * BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE + - BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET; -} - -void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruction, - Location root, - Register obj, - uint32_t offset, - ReadBarrierOption read_barrier_option, - MipsLabel* label_low) { - bool reordering; - if (label_low != nullptr) { - DCHECK_EQ(offset, 0x5678u); - } - Register root_reg = root.AsRegister<Register>(); - if (read_barrier_option == kWithReadBarrier) { - DCHECK(kEmitCompilerReadBarrier); - if (kUseBakerReadBarrier) { - // Fast path implementation of art::ReadBarrier::BarrierForRoot when - // Baker's read barrier are used: - if (kBakerReadBarrierThunksEnableForGcRoots) { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded GC root or not. Instead, we - // load into `temp` (T9) the read barrier mark introspection entrypoint. - // If `temp` is null, it means that `GetIsGcMarking()` is false, and - // vice versa. - // - // We use thunks for the slow path. That thunk checks the reference - // and jumps to the entrypoint if needed. - // - // temp = Thread::Current()->pReadBarrierMarkReg00 - // // AKA &art_quick_read_barrier_mark_introspection. - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { - // temp = &gc_root_thunk<root_reg> - // root = temp(root) - // } - - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0); - const int thunk_disp = GetBakerMarkGcRootThunkDisplacement(root_reg); - int16_t offset_low = Low16Bits(offset); - int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign - // extension in lw. - bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); - Register base = short_offset ? obj : TMP; - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); - reordering = __ SetReorder(false); - if (!short_offset) { - DCHECK(!label_low); - __ AddUpper(base, obj, offset_high); - } - MipsLabel skip_call; - __ Beqz(T9, &skip_call, /* is_bare= */ true); - if (label_low != nullptr) { - DCHECK(short_offset); - __ Bind(label_low); - } - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadWord, root_reg, base, offset_low); // Single instruction - // in delay slot. - if (isR6) { - __ Jialc(T9, thunk_disp); - } else { - __ Addiu(T9, T9, thunk_disp); - __ Jalr(T9); - __ Nop(); - } - __ Bind(&skip_call); - __ SetReorder(reordering); - } else { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded GC root or not. Instead, we - // load into `temp` (T9) the read barrier mark entry point corresponding - // to register `root`. If `temp` is null, it means that `GetIsGcMarking()` - // is false, and vice versa. - // - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // if (temp != null) { - // root = temp(root) - // } - - if (label_low != nullptr) { - reordering = __ SetReorder(false); - __ Bind(label_low); - } - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadWord, root_reg, obj, offset); - if (label_low != nullptr) { - __ SetReorder(reordering); - } - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // Slow path marking the GC root `root`. - Location temp = Location::RegisterLocation(T9); - SlowPathCodeMIPS* slow_path = - new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS( - instruction, - root, - /*entrypoint*/ temp); - codegen_->AddSlowPath(slow_path); - - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); - __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } - } else { - if (label_low != nullptr) { - reordering = __ SetReorder(false); - __ Bind(label_low); - } - // GC root loaded through a slow path for read barriers other - // than Baker's. - // /* GcRoot<mirror::Object>* */ root = obj + offset - __ Addiu32(root_reg, obj, offset); - if (label_low != nullptr) { - __ SetReorder(reordering); - } - // /* mirror::Object* */ root = root->Read() - codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); - } - } else { - if (label_low != nullptr) { - reordering = __ SetReorder(false); - __ Bind(label_low); - } - // Plain GC root load with no read barrier. - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadWord, root_reg, obj, offset); - // Note that GC roots are not affected by heap poisoning, thus we - // do not have to unpoison `root_reg` here. - if (label_low != nullptr) { - __ SetReorder(reordering); - } - } -} - -void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location temp, - bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - - if (kBakerReadBarrierThunksEnableForFields) { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded reference or not. Instead, we - // load into `temp` (T9) the read barrier mark introspection entrypoint. - // If `temp` is null, it means that `GetIsGcMarking()` is false, and - // vice versa. - // - // We use thunks for the slow path. That thunk checks the reference - // and jumps to the entrypoint if needed. If the holder is not gray, - // it issues a load-load memory barrier and returns to the original - // reference load. - // - // temp = Thread::Current()->pReadBarrierMarkReg00 - // // AKA &art_quick_read_barrier_mark_introspection. - // if (temp != nullptr) { - // temp = &field_array_thunk<holder_reg> - // temp() - // } - // not_gray_return_address: - // // If the offset is too large to fit into the lw instruction, we - // // use an adjusted base register (TMP) here. This register - // // receives bits 16 ... 31 of the offset before the thunk invocation - // // and the thunk benefits from it. - // HeapReference<mirror::Object> reference = *(obj+offset); // Original reference load. - // gray_return_address: - - DCHECK(temp.IsInvalid()); - bool isR6 = GetInstructionSetFeatures().IsR6(); - int16_t offset_low = Low16Bits(offset); - int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lw. - bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); - bool reordering = __ SetReorder(false); - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0); - // There may have or may have not been a null check if the field offset is smaller than - // the page size. - // There must've been a null check in case it's actually a load from an array. - // We will, however, perform an explicit null check in the thunk as it's easier to - // do it than not. - if (instruction->IsArrayGet()) { - DCHECK(!needs_null_check); - } - const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, short_offset); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); - Register ref_reg = ref.AsRegister<Register>(); - Register base = short_offset ? obj : TMP; - MipsLabel skip_call; - if (short_offset) { - if (isR6) { - __ Beqzc(T9, &skip_call, /* is_bare= */ true); - __ Nop(); // In forbidden slot. - __ Jialc(T9, thunk_disp); - } else { - __ Beqz(T9, &skip_call, /* is_bare= */ true); - __ Addiu(T9, T9, thunk_disp); // In delay slot. - __ Jalr(T9); - __ Nop(); // In delay slot. - } - __ Bind(&skip_call); - } else { - if (isR6) { - __ Beqz(T9, &skip_call, /* is_bare= */ true); - __ Aui(base, obj, offset_high); // In delay slot. - __ Jialc(T9, thunk_disp); - __ Bind(&skip_call); - } else { - __ Lui(base, offset_high); - __ Beqz(T9, &skip_call, /* is_bare= */ true); - __ Addiu(T9, T9, thunk_disp); // In delay slot. - __ Jalr(T9); - __ Bind(&skip_call); - __ Addu(base, base, obj); // In delay slot. - } - } - // /* HeapReference<Object> */ ref = *(obj + offset) - __ LoadFromOffset(kLoadWord, ref_reg, base, offset_low); // Single instruction. - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - __ MaybeUnpoisonHeapReference(ref_reg); - __ SetReorder(reordering); - return; - } - - // /* HeapReference<Object> */ ref = *(obj + offset) - Location no_index = Location::NoLocation(); - ScaleFactor no_scale_factor = TIMES_1; - GenerateReferenceLoadWithBakerReadBarrier(instruction, - ref, - obj, - offset, - no_index, - no_scale_factor, - temp, - needs_null_check); -} - -void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - uint32_t data_offset, - Location index, - Location temp, - bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - ScaleFactor scale_factor = TIMES_4; - - if (kBakerReadBarrierThunksEnableForArrays) { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded reference or not. Instead, we - // load into `temp` (T9) the read barrier mark introspection entrypoint. - // If `temp` is null, it means that `GetIsGcMarking()` is false, and - // vice versa. - // - // We use thunks for the slow path. That thunk checks the reference - // and jumps to the entrypoint if needed. If the holder is not gray, - // it issues a load-load memory barrier and returns to the original - // reference load. - // - // temp = Thread::Current()->pReadBarrierMarkReg00 - // // AKA &art_quick_read_barrier_mark_introspection. - // if (temp != nullptr) { - // temp = &field_array_thunk<holder_reg> - // temp() - // } - // not_gray_return_address: - // // The element address is pre-calculated in the TMP register before the - // // thunk invocation and the thunk benefits from it. - // HeapReference<mirror::Object> reference = data[index]; // Original reference load. - // gray_return_address: - - DCHECK(temp.IsInvalid()); - DCHECK(index.IsValid()); - bool reordering = __ SetReorder(false); - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0); - // We will not do the explicit null check in the thunk as some form of a null check - // must've been done earlier. - DCHECK(!needs_null_check); - const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset= */ false); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); - Register ref_reg = ref.AsRegister<Register>(); - Register index_reg = index.IsRegisterPair() - ? index.AsRegisterPairLow<Register>() - : index.AsRegister<Register>(); - MipsLabel skip_call; - if (GetInstructionSetFeatures().IsR6()) { - __ Beqz(T9, &skip_call, /* is_bare= */ true); - __ Lsa(TMP, index_reg, obj, scale_factor); // In delay slot. - __ Jialc(T9, thunk_disp); - __ Bind(&skip_call); - } else { - __ Sll(TMP, index_reg, scale_factor); - __ Beqz(T9, &skip_call, /* is_bare= */ true); - __ Addiu(T9, T9, thunk_disp); // In delay slot. - __ Jalr(T9); - __ Bind(&skip_call); - __ Addu(TMP, TMP, obj); // In delay slot. - } - // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor)) - DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))) << data_offset; - __ LoadFromOffset(kLoadWord, ref_reg, TMP, data_offset); // Single instruction. - __ MaybeUnpoisonHeapReference(ref_reg); - __ SetReorder(reordering); - return; - } - - // /* HeapReference<Object> */ ref = - // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - GenerateReferenceLoadWithBakerReadBarrier(instruction, - ref, - obj, - data_offset, - index, - scale_factor, - temp, - needs_null_check); -} - -void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - Location temp, - bool needs_null_check, - bool always_update_field) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - - // In slow path based read barriers, the read barrier call is - // inserted after the original load. However, in fast path based - // Baker's read barriers, we need to perform the load of - // mirror::Object::monitor_ *before* the original reference load. - // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. - // } - // - // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as it performs additional checks that we do - // not do here for performance reasons. - - Register ref_reg = ref.AsRegister<Register>(); - Register temp_reg = temp.AsRegister<Register>(); - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - - // /* int32_t */ monitor = obj->monitor_ - __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - __ Sync(0); // Barrier to prevent load-load reordering. - - // The actual reference load. - if (index.IsValid()) { - // Load types involving an "index": ArrayGet, - // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject - // intrinsics. - // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) - if (index.IsConstant()) { - size_t computed_offset = - (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset; - __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); - } else { - // Handle the special case of the - // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject - // intrinsics, which use a register pair as index ("long - // offset"), of which only the low part contains data. - Register index_reg = index.IsRegisterPair() - ? index.AsRegisterPairLow<Register>() - : index.AsRegister<Register>(); - __ ShiftAndAdd(TMP, index_reg, obj, scale_factor, TMP); - __ LoadFromOffset(kLoadWord, ref_reg, TMP, offset); - } - } else { - // /* HeapReference<Object> */ ref = *(obj + offset) - __ LoadFromOffset(kLoadWord, ref_reg, obj, offset); - } - - // Object* ref = ref_addr->AsMirrorPtr() - __ MaybeUnpoisonHeapReference(ref_reg); - - // Slow path marking the object `ref` when it is gray. - SlowPathCodeMIPS* slow_path; - if (always_update_field) { - // ReadBarrierMarkAndUpdateFieldSlowPathMIPS only supports address - // of the form `obj + field_offset`, where `obj` is a register and - // `field_offset` is a register pair (of which only the lower half - // is used). Thus `offset` and `scale_factor` above are expected - // to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - slow_path = new (GetScopedAllocator()) - ReadBarrierMarkAndUpdateFieldSlowPathMIPS(instruction, - ref, - obj, - /* field_offset= */ index, - temp_reg); - } else { - slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS(instruction, ref); - } - AddSlowPath(slow_path); - - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit into the sign bit (31) and - // performing a branch on less than zero. - static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size"); - __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift); - __ Bltz(temp_reg, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); -} - -void CodeGeneratorMIPS::GenerateReadBarrierSlow(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { - DCHECK(kEmitCompilerReadBarrier); - - // Insert a slow path based read barrier *after* the reference load. - // - // If heap poisoning is enabled, the unpoisoning of the loaded - // reference will be carried out by the runtime within the slow - // path. - // - // Note that `ref` currently does not get unpoisoned (when heap - // poisoning is enabled), which is alright as the `ref` argument is - // not used by the artReadBarrierSlow entry point. - // - // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. - SlowPathCodeMIPS* slow_path = new (GetScopedAllocator()) - ReadBarrierForHeapReferenceSlowPathMIPS(instruction, out, ref, obj, offset, index); - AddSlowPath(slow_path); - - __ B(slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); -} - -void CodeGeneratorMIPS::MaybeGenerateReadBarrierSlow(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { - if (kEmitCompilerReadBarrier) { - // Baker's read barriers shall be handled by the fast path - // (CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier). - DCHECK(!kUseBakerReadBarrier); - // If heap poisoning is enabled, unpoisoning will be taken care of - // by the runtime within the slow path. - GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); - } else if (kPoisonHeapReferences) { - __ UnpoisonHeapReference(out.AsRegister<Register>()); - } -} - -void CodeGeneratorMIPS::GenerateReadBarrierForRootSlow(HInstruction* instruction, - Location out, - Location root) { - DCHECK(kEmitCompilerReadBarrier); - - // Insert a slow path based read barrier *after* the GC root load. - // - // Note that GC roots are not affected by heap poisoning, so we do - // not need to do anything special for this here. - SlowPathCodeMIPS* slow_path = - new (GetScopedAllocator()) ReadBarrierForRootSlowPathMIPS(instruction, out, root); - AddSlowPath(slow_path); - - __ B(slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); -} - -void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - bool baker_read_barrier_slow_path = false; - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: { - bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); - call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; - break; - } - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; - case TypeCheckKind::kBitstringCheck: - break; - } - - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); - if (baker_read_barrier_slow_path) { - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - } - locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } - // The output does overlap inputs. - // Note that TypeCheckSlowPathMIPS uses this register too. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); -} - -void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - LocationSummary* locations = instruction->GetLocations(); - Location obj_loc = locations->InAt(0); - Register obj = obj_loc.AsRegister<Register>(); - Location cls = locations->InAt(1); - Location out_loc = locations->Out(); - Register out = out_loc.AsRegister<Register>(); - const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); - DCHECK_LE(num_temps, 1u); - Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - MipsLabel done; - SlowPathCodeMIPS* slow_path = nullptr; - - // Return 0 if `obj` is null. - // Avoid this check if we know `obj` is not null. - if (instruction->MustDoNullCheck()) { - __ Move(out, ZERO); - __ Beqz(obj, &done); - } - - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: { - ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); - // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - read_barrier_option); - // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls.AsRegister<Register>()); - __ Sltiu(out, out, 1); - break; - } - - case TypeCheckKind::kAbstractClassCheck: { - ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); - // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - read_barrier_option); - // If the class is abstract, we eagerly fetch the super class of the - // object to avoid doing a comparison we know will fail. - MipsLabel loop; - __ Bind(&loop); - // /* HeapReference<Class> */ out = out->super_class_ - GenerateReferenceLoadOneRegister(instruction, - out_loc, - super_offset, - maybe_temp_loc, - read_barrier_option); - // If `out` is null, we use it for the result, and jump to `done`. - __ Beqz(out, &done); - __ Bne(out, cls.AsRegister<Register>(), &loop); - __ LoadConst32(out, 1); - break; - } - - case TypeCheckKind::kClassHierarchyCheck: { - ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); - // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - read_barrier_option); - // Walk over the class hierarchy to find a match. - MipsLabel loop, success; - __ Bind(&loop); - __ Beq(out, cls.AsRegister<Register>(), &success); - // /* HeapReference<Class> */ out = out->super_class_ - GenerateReferenceLoadOneRegister(instruction, - out_loc, - super_offset, - maybe_temp_loc, - read_barrier_option); - __ Bnez(out, &loop); - // If `out` is null, we use it for the result, and jump to `done`. - __ B(&done); - __ Bind(&success); - __ LoadConst32(out, 1); - break; - } - - case TypeCheckKind::kArrayObjectCheck: { - ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); - // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - read_barrier_option); - // Do an exact check. - MipsLabel success; - __ Beq(out, cls.AsRegister<Register>(), &success); - // Otherwise, we need to check that the object's class is a non-primitive array. - // /* HeapReference<Class> */ out = out->component_type_ - GenerateReferenceLoadOneRegister(instruction, - out_loc, - component_offset, - maybe_temp_loc, - read_barrier_option); - // If `out` is null, we use it for the result, and jump to `done`. - __ Beqz(out, &done); - __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Sltiu(out, out, 1); - __ B(&done); - __ Bind(&success); - __ LoadConst32(out, 1); - break; - } - - case TypeCheckKind::kArrayCheck: { - // No read barrier since the slow path will retry upon failure. - // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - kWithoutReadBarrier); - DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( - instruction, /* is_fatal= */ false); - codegen_->AddSlowPath(slow_path); - __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); - __ LoadConst32(out, 1); - break; - } - - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: { - // Note that we indeed only call on slow path, but we always go - // into the slow path for the unresolved and interface check - // cases. - // - // We cannot directly call the InstanceofNonTrivial runtime - // entry point without resorting to a type checking slow path - // here (i.e. by calling InvokeRuntime directly), as it would - // require to assign fixed registers for the inputs of this - // HInstanceOf instruction (following the runtime calling - // convention), which might be cluttered by the potential first - // read barrier emission at the beginning of this method. - // - // TODO: Introduce a new runtime entry point taking the object - // to test (instead of its class) as argument, and let it deal - // with the read barrier issues. This will let us refactor this - // case of the `switch` code as it was previously (with a direct - // call to the runtime not using a type checking slow path). - // This should also be beneficial for the other cases above. - DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( - instruction, /* is_fatal= */ false); - codegen_->AddSlowPath(slow_path); - __ B(slow_path->GetEntryLabel()); - break; - } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, out); - __ Sltiu(out, out, 1); - break; - } - } - - __ Bind(&done); - - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } -} - -void LocationsBuilderMIPS::VisitIntConstant(HIntConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); - locations->SetOut(Location::ConstantLocation(constant)); -} - -void InstructionCodeGeneratorMIPS::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { - // Will be generated at use site. -} - -void LocationsBuilderMIPS::VisitNullConstant(HNullConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); - locations->SetOut(Location::ConstantLocation(constant)); -} - -void InstructionCodeGeneratorMIPS::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { - // Will be generated at use site. -} - -void LocationsBuilderMIPS::HandleInvoke(HInvoke* invoke) { - InvokeDexCallingConventionVisitorMIPS calling_convention_visitor; - CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); -} - -void LocationsBuilderMIPS::VisitInvokeInterface(HInvokeInterface* invoke) { - HandleInvoke(invoke); - // The register T7 is required to be used for the hidden argument in - // art_quick_imt_conflict_trampoline, so add the hidden argument. - invoke->GetLocations()->AddTemp(Location::RegisterLocation(T7)); -} - -void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke) { - // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); - Location receiver = invoke->GetLocations()->InAt(0); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize); - - // temp = object->GetClass(); - if (receiver.IsStackSlot()) { - __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex()); - __ LoadFromOffset(kLoadWord, temp, temp, class_offset); - } else { - __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); - } - codegen_->MaybeRecordImplicitNullCheck(invoke); - // Instead of simply (possibly) unpoisoning `temp` here, we should - // emit a read barrier for the previous class reference load. - // However this is not required in practice, as this is an - // intermediate/temporary reference and because the current - // concurrent copying collector keeps the from-space memory - // intact/accessible until the end of the marking phase (the - // concurrent copying collector may not in the future). - __ MaybeUnpoisonHeapReference(temp); - __ LoadFromOffset(kLoadWord, temp, temp, - mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value()); - uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex(), kMipsPointerSize)); - // temp = temp->GetImtEntryAt(method_offset); - __ LoadFromOffset(kLoadWord, temp, temp, method_offset); - // T9 = temp->GetEntryPoint(); - __ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value()); - // Set the hidden argument. - __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(), - invoke->GetDexMethodIndex()); - // T9(); - __ Jalr(T9); - __ NopIfNoReordering(); - DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); -} - -void LocationsBuilderMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { - IntrinsicLocationsBuilderMIPS intrinsic(codegen_); - if (intrinsic.TryDispatch(invoke)) { - return; - } - - HandleInvoke(invoke); -} - -void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // Explicit clinit checks triggered by static invokes must have been pruned by - // art::PrepareForRegisterAllocation. - DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); - - bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6(); - bool has_irreducible_loops = codegen_->GetGraph()->HasIrreducibleLoops(); - bool has_extra_input = invoke->HasPcRelativeMethodLoadKind() && !is_r6 && !has_irreducible_loops; - - IntrinsicLocationsBuilderMIPS intrinsic(codegen_); - if (intrinsic.TryDispatch(invoke)) { - if (invoke->GetLocations()->CanCall() && has_extra_input) { - invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); - } - return; - } - - HandleInvoke(invoke); - - // Add the extra input register if either the dex cache array base register - // or the PC-relative base register for accessing literals is needed. - if (has_extra_input) { - invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); - } -} - -void LocationsBuilderMIPS::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { - HandleInvoke(invoke); -} - -void InstructionCodeGeneratorMIPS::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { - codegen_->GenerateInvokePolymorphicCall(invoke); -} - -void LocationsBuilderMIPS::VisitInvokeCustom(HInvokeCustom* invoke) { - HandleInvoke(invoke); -} - -void InstructionCodeGeneratorMIPS::VisitInvokeCustom(HInvokeCustom* invoke) { - codegen_->GenerateInvokeCustomCall(invoke); -} - -static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen) { - if (invoke->GetLocations()->Intrinsified()) { - IntrinsicCodeGeneratorMIPS intrinsic(codegen); - intrinsic.Dispatch(invoke); - return true; - } - return false; -} - -HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) { - switch (desired_string_load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageRelRo: - case HLoadString::LoadKind::kBssEntry: - DCHECK(!Runtime::Current()->UseJitCompilation()); - break; - case HLoadString::LoadKind::kJitBootImageAddress: - case HLoadString::LoadKind::kJitTableAddress: - DCHECK(Runtime::Current()->UseJitCompilation()); - break; - case HLoadString::LoadKind::kRuntimeCall: - break; - } - return desired_string_load_kind; -} - -HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) { - switch (desired_class_load_kind) { - case HLoadClass::LoadKind::kInvalid: - LOG(FATAL) << "UNREACHABLE"; - UNREACHABLE(); - case HLoadClass::LoadKind::kReferrersClass: - break; - case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageRelRo: - case HLoadClass::LoadKind::kBssEntry: - DCHECK(!Runtime::Current()->UseJitCompilation()); - break; - case HLoadClass::LoadKind::kJitBootImageAddress: - case HLoadClass::LoadKind::kJitTableAddress: - DCHECK(Runtime::Current()->UseJitCompilation()); - break; - case HLoadClass::LoadKind::kRuntimeCall: - break; - } - return desired_class_load_kind; -} - -Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, - Register temp) { - CHECK(!GetInstructionSetFeatures().IsR6()); - CHECK(!GetGraph()->HasIrreducibleLoops()); - CHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); - Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - if (!invoke->GetLocations()->Intrinsified()) { - return location.AsRegister<Register>(); - } - // For intrinsics we allow any location, so it may be on the stack. - if (!location.IsRegister()) { - __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex()); - return temp; - } - // For register locations, check if the register was saved. If so, get it from the stack. - // Note: There is a chance that the register was saved but not overwritten, so we could - // save one load. However, since this is just an intrinsic slow path we prefer this - // simple and more robust approach rather that trying to determine if that's the case. - SlowPathCode* slow_path = GetCurrentSlowPath(); - DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. - if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { - int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); - __ LoadFromOffset(kLoadWord, temp, SP, stack_offset); - return temp; - } - return location.AsRegister<Register>(); -} - -HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - ArtMethod* method ATTRIBUTE_UNUSED) { - return desired_dispatch_info; -} - -void CodeGeneratorMIPS::GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { - // All registers are assumed to be correctly set up per the calling convention. - Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. - HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); - HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); - bool is_r6 = GetInstructionSetFeatures().IsR6(); - bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); - Register base_reg = (invoke->HasPcRelativeMethodLoadKind() && !is_r6 && !has_irreducible_loops) - ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()) - : ZERO; - - switch (method_load_kind) { - case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { - // temp = thread->string_init_entrypoint - uint32_t offset = - GetThreadOffset<kMipsPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); - __ LoadFromOffset(kLoadWord, - temp.AsRegister<Register>(), - TR, - offset); - break; - } - case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - break; - case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { - DCHECK(GetCompilerOptions().IsBootImage()); - PcRelativePatchInfo* info_high = NewBootImageMethodPatch(invoke->GetTargetMethod()); - PcRelativePatchInfo* info_low = - NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high); - Register temp_reg = temp.AsRegister<Register>(); - EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); - __ Addiu(temp_reg, TMP, /* imm16= */ 0x5678, &info_low->label); - break; - } - case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { - uint32_t boot_image_offset = GetBootImageOffset(invoke); - PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset); - PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high); - Register temp_reg = temp.AsRegister<Register>(); - EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); - __ Lw(temp_reg, TMP, /* imm16= */ 0x5678, &info_low->label); - break; - } - case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { - PcRelativePatchInfo* info_high = NewMethodBssEntryPatch( - MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); - PcRelativePatchInfo* info_low = NewMethodBssEntryPatch( - MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()), info_high); - Register temp_reg = temp.AsRegister<Register>(); - EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); - __ Lw(temp_reg, TMP, /* imm16= */ 0x5678, &info_low->label); - break; - } - case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: - __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); - break; - case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { - GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); - return; // No code pointer retrieval; the runtime performs the call directly. - } - } - - switch (code_ptr_location) { - case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Bal(&frame_entry_label_); - break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: - // T9 = callee_method->entry_point_from_quick_compiled_code_; - __ LoadFromOffset(kLoadWord, - T9, - callee_method.AsRegister<Register>(), - ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kMipsPointerSize).Int32Value()); - // T9() - __ Jalr(T9); - __ NopIfNoReordering(); - break; - } - RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); - - DCHECK(!IsLeafMethod()); -} - -void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // Explicit clinit checks triggered by static invokes must have been pruned by - // art::PrepareForRegisterAllocation. - DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); - - if (TryGenerateIntrinsicCode(invoke, codegen_)) { - return; - } - - LocationSummary* locations = invoke->GetLocations(); - codegen_->GenerateStaticOrDirectCall(invoke, - locations->HasTemps() - ? locations->GetTemp(0) - : Location::NoLocation()); -} - -void CodeGeneratorMIPS::GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { - // Use the calling convention instead of the location of the receiver, as - // intrinsics may have put the receiver in a different register. In the intrinsics - // slow path, the arguments have been moved to the right place, so here we are - // guaranteed that the receiver is the first register of the calling convention. - InvokeDexCallingConvention calling_convention; - Register receiver = calling_convention.GetRegisterAt(0); - - Register temp = temp_location.AsRegister<Register>(); - size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( - invoke->GetVTableIndex(), kMipsPointerSize).SizeValue(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize); - - // temp = object->GetClass(); - __ LoadFromOffset(kLoadWord, temp, receiver, class_offset); - MaybeRecordImplicitNullCheck(invoke); - // Instead of simply (possibly) unpoisoning `temp` here, we should - // emit a read barrier for the previous class reference load. - // However this is not required in practice, as this is an - // intermediate/temporary reference and because the current - // concurrent copying collector keeps the from-space memory - // intact/accessible until the end of the marking phase (the - // concurrent copying collector may not in the future). - __ MaybeUnpoisonHeapReference(temp); - // temp = temp->GetMethodAt(method_offset); - __ LoadFromOffset(kLoadWord, temp, temp, method_offset); - // T9 = temp->GetEntryPoint(); - __ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value()); - // T9(); - __ Jalr(T9); - __ NopIfNoReordering(); - RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); -} - -void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { - if (TryGenerateIntrinsicCode(invoke, codegen_)) { - return; - } - - codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); - DCHECK(!codegen_->IsLeafMethod()); -} - -void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { - HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { - InvokeRuntimeCallingConvention calling_convention; - Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); - CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc); - return; - } - DCHECK(!cls->NeedsAccessCheck()); - const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - const bool has_irreducible_loops = codegen_->GetGraph()->HasIrreducibleLoops(); - const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); - LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); - if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - } - switch (load_kind) { - // We need an extra register for PC-relative literals on R2. - case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageRelRo: - case HLoadClass::LoadKind::kBssEntry: - case HLoadClass::LoadKind::kJitBootImageAddress: - if (isR6) { - break; - } - if (has_irreducible_loops) { - if (load_kind != HLoadClass::LoadKind::kJitBootImageAddress) { - codegen_->ClobberRA(); - } - break; - } - FALLTHROUGH_INTENDED; - case HLoadClass::LoadKind::kReferrersClass: - locations->SetInAt(0, Location::RequiresRegister()); - break; - default: - break; - } - locations->SetOut(Location::RequiresRegister()); - if (load_kind == HLoadClass::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { - // Rely on the type resolution or initialization and marking to save everything we need. - locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); - } else { - // For non-Baker read barriers we have a temp-clobbering call. - } - } -} - -// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not -// move. -void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { - HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { - codegen_->GenerateLoadClassRuntimeCall(cls); - return; - } - DCHECK(!cls->NeedsAccessCheck()); - - LocationSummary* locations = cls->GetLocations(); - Location out_loc = locations->Out(); - Register out = out_loc.AsRegister<Register>(); - Register base_or_current_method_reg; - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); - switch (load_kind) { - // We need an extra register for PC-relative literals on R2. - case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageRelRo: - case HLoadClass::LoadKind::kBssEntry: - case HLoadClass::LoadKind::kJitBootImageAddress: - base_or_current_method_reg = - (isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>(); - break; - case HLoadClass::LoadKind::kReferrersClass: - case HLoadClass::LoadKind::kRuntimeCall: - base_or_current_method_reg = locations->InAt(0).AsRegister<Register>(); - break; - default: - base_or_current_method_reg = ZERO; - break; - } - - const ReadBarrierOption read_barrier_option = cls->IsInBootImage() - ? kWithoutReadBarrier - : kCompilerReadBarrierOption; - bool generate_null_check = false; - switch (load_kind) { - case HLoadClass::LoadKind::kReferrersClass: { - DCHECK(!cls->CanCallRuntime()); - DCHECK(!cls->MustGenerateClinitCheck()); - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad(cls, - out_loc, - base_or_current_method_reg, - ArtMethod::DeclaringClassOffset().Int32Value(), - read_barrier_option); - break; - } - case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); - CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, - out, - base_or_current_method_reg); - __ Addiu(out, out, /* imm16= */ 0x5678, &info_low->label); - break; - } - case HLoadClass::LoadKind::kBootImageRelRo: { - DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls); - CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewBootImageRelRoPatch(boot_image_offset); - CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, - out, - base_or_current_method_reg); - __ Lw(out, out, /* imm16= */ 0x5678, &info_low->label); - break; - } - case HLoadClass::LoadKind::kBssEntry: { - CodeGeneratorMIPS::PcRelativePatchInfo* bss_info_high = - codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); - CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex(), bss_info_high); - codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, - out, - base_or_current_method_reg); - GenerateGcRootFieldLoad(cls, - out_loc, - out, - /* offset= */ 0x5678, - read_barrier_option, - &info_low->label); - generate_null_check = true; - break; - } - case HLoadClass::LoadKind::kJitBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); - DCHECK_NE(address, 0u); - if (isR6 || !has_irreducible_loops) { - __ LoadLiteral(out, - base_or_current_method_reg, - codegen_->DeduplicateBootImageAddressLiteral(address)); - } else { - __ LoadConst32(out, address); - } - break; - } - case HLoadClass::LoadKind::kJitTableAddress: { - CodeGeneratorMIPS::JitPatchInfo* info = codegen_->NewJitRootClassPatch(cls->GetDexFile(), - cls->GetTypeIndex(), - cls->GetClass()); - bool reordering = __ SetReorder(false); - __ Bind(&info->high_label); - __ Lui(out, /* imm16= */ 0x1234); - __ SetReorder(reordering); - GenerateGcRootFieldLoad(cls, - out_loc, - out, - /* offset= */ 0x5678, - read_barrier_option, - &info->low_label); - break; - } - case HLoadClass::LoadKind::kRuntimeCall: - case HLoadClass::LoadKind::kInvalid: - LOG(FATAL) << "UNREACHABLE"; - UNREACHABLE(); - } - - if (generate_null_check || cls->MustGenerateClinitCheck()) { - DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS* slow_path = - new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS(cls, cls); - codegen_->AddSlowPath(slow_path); - if (generate_null_check) { - __ Beqz(out, slow_path->GetEntryLabel()); - } - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); - } - } -} - -void LocationsBuilderMIPS::VisitLoadMethodHandle(HLoadMethodHandle* load) { - InvokeRuntimeCallingConvention calling_convention; - Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); - CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, loc, loc); -} - -void InstructionCodeGeneratorMIPS::VisitLoadMethodHandle(HLoadMethodHandle* load) { - codegen_->GenerateLoadMethodHandleRuntimeCall(load); -} - -void LocationsBuilderMIPS::VisitLoadMethodType(HLoadMethodType* load) { - InvokeRuntimeCallingConvention calling_convention; - Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); - CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, loc, loc); -} - -void InstructionCodeGeneratorMIPS::VisitLoadMethodType(HLoadMethodType* load) { - codegen_->GenerateLoadMethodTypeRuntimeCall(load); -} - -static int32_t GetExceptionTlsOffset() { - return Thread::ExceptionOffset<kMipsPointerSize>().Int32Value(); -} - -void LocationsBuilderMIPS::VisitLoadException(HLoadException* load) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorMIPS::VisitLoadException(HLoadException* load) { - Register out = load->GetLocations()->Out().AsRegister<Register>(); - __ LoadFromOffset(kLoadWord, out, TR, GetExceptionTlsOffset()); -} - -void LocationsBuilderMIPS::VisitClearException(HClearException* clear) { - new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); -} - -void InstructionCodeGeneratorMIPS::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { - __ StoreToOffset(kStoreWord, ZERO, TR, GetExceptionTlsOffset()); -} - -void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); - HLoadString::LoadKind load_kind = load->GetLoadKind(); - const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - const bool has_irreducible_loops = codegen_->GetGraph()->HasIrreducibleLoops(); - switch (load_kind) { - // We need an extra register for PC-relative literals on R2. - case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageRelRo: - case HLoadString::LoadKind::kBssEntry: - case HLoadString::LoadKind::kJitBootImageAddress: - if (isR6) { - break; - } - if (has_irreducible_loops) { - if (load_kind != HLoadString::LoadKind::kJitBootImageAddress) { - codegen_->ClobberRA(); - } - break; - } - FALLTHROUGH_INTENDED; - // We need an extra register for PC-relative dex cache accesses. - case HLoadString::LoadKind::kRuntimeCall: - locations->SetInAt(0, Location::RequiresRegister()); - break; - default: - break; - } - if (load_kind == HLoadString::LoadKind::kRuntimeCall) { - InvokeRuntimeCallingConvention calling_convention; - locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } else { - locations->SetOut(Location::RequiresRegister()); - if (load_kind == HLoadString::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { - // Rely on the pResolveString and marking to save everything we need. - locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); - } else { - // For non-Baker read barriers we have a temp-clobbering call. - } - } - } -} - -// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not -// move. -void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { - HLoadString::LoadKind load_kind = load->GetLoadKind(); - LocationSummary* locations = load->GetLocations(); - Location out_loc = locations->Out(); - Register out = out_loc.AsRegister<Register>(); - Register base_or_current_method_reg; - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); - switch (load_kind) { - // We need an extra register for PC-relative literals on R2. - case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageRelRo: - case HLoadString::LoadKind::kBssEntry: - case HLoadString::LoadKind::kJitBootImageAddress: - base_or_current_method_reg = - (isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>(); - break; - default: - base_or_current_method_reg = ZERO; - break; - } - - switch (load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); - CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, - out, - base_or_current_method_reg); - __ Addiu(out, out, /* imm16= */ 0x5678, &info_low->label); - return; - } - case HLoadString::LoadKind::kBootImageRelRo: { - DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - uint32_t boot_image_offset = codegen_->GetBootImageOffset(load); - CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewBootImageRelRoPatch(boot_image_offset); - CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, - out, - base_or_current_method_reg); - __ Lw(out, out, /* imm16= */ 0x5678, &info_low->label); - return; - } - case HLoadString::LoadKind::kBssEntry: { - CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); - CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex(), info_high); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, - out, - base_or_current_method_reg); - GenerateGcRootFieldLoad(load, - out_loc, - out, - /* offset= */ 0x5678, - kCompilerReadBarrierOption, - &info_low->label); - SlowPathCodeMIPS* slow_path = - new (codegen_->GetScopedAllocator()) LoadStringSlowPathMIPS(load); - codegen_->AddSlowPath(slow_path); - __ Beqz(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - return; - } - case HLoadString::LoadKind::kJitBootImageAddress: { - uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); - DCHECK_NE(address, 0u); - if (isR6 || !has_irreducible_loops) { - __ LoadLiteral(out, - base_or_current_method_reg, - codegen_->DeduplicateBootImageAddressLiteral(address)); - } else { - __ LoadConst32(out, address); - } - return; - } - case HLoadString::LoadKind::kJitTableAddress: { - CodeGeneratorMIPS::JitPatchInfo* info = - codegen_->NewJitRootStringPatch(load->GetDexFile(), - load->GetStringIndex(), - load->GetString()); - bool reordering = __ SetReorder(false); - __ Bind(&info->high_label); - __ Lui(out, /* imm16= */ 0x1234); - __ SetReorder(reordering); - GenerateGcRootFieldLoad(load, - out_loc, - out, - /* offset= */ 0x5678, - kCompilerReadBarrierOption, - &info->low_label); - return; - } - default: - break; - } - - // TODO: Re-add the compiler code to do string dex cache lookup again. - DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall); - InvokeRuntimeCallingConvention calling_convention; - DCHECK_EQ(calling_convention.GetRegisterAt(0), out); - __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); - codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); - CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); -} - -void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); - locations->SetOut(Location::ConstantLocation(constant)); -} - -void InstructionCodeGeneratorMIPS::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { - // Will be generated at use site. -} - -void LocationsBuilderMIPS::VisitMonitorOperation(HMonitorOperation* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, LocationSummary::kCallOnMainOnly); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); -} - -void InstructionCodeGeneratorMIPS::VisitMonitorOperation(HMonitorOperation* instruction) { - if (instruction->IsEnter()) { - codegen_->InvokeRuntime(kQuickLockObject, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); - } else { - codegen_->InvokeRuntime(kQuickUnlockObject, instruction, instruction->GetDexPc()); - } - CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); -} - -void LocationsBuilderMIPS::VisitMul(HMul* mul) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); - switch (mul->GetResultType()) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); - } -} - -void InstructionCodeGeneratorMIPS::VisitMul(HMul* instruction) { - DataType::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - - switch (type) { - case DataType::Type::kInt32: { - Register dst = locations->Out().AsRegister<Register>(); - Register lhs = locations->InAt(0).AsRegister<Register>(); - Register rhs = locations->InAt(1).AsRegister<Register>(); - - if (isR6) { - __ MulR6(dst, lhs, rhs); - } else { - __ MulR2(dst, lhs, rhs); - } - break; - } - case DataType::Type::kInt64: { - Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); - Register dst_low = locations->Out().AsRegisterPairLow<Register>(); - Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>(); - Register rhs_high = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register rhs_low = locations->InAt(1).AsRegisterPairLow<Register>(); - - // Extra checks to protect caused by the existance of A1_A2. - // The algorithm is wrong if dst_high is either lhs_lo or rhs_lo: - // (e.g. lhs=a0_a1, rhs=a2_a3 and dst=a1_a2). - DCHECK_NE(dst_high, lhs_low); - DCHECK_NE(dst_high, rhs_low); - - // A_B * C_D - // dst_hi: [ low(A*D) + low(B*C) + hi(B*D) ] - // dst_lo: [ low(B*D) ] - // Note: R2 and R6 MUL produce the low 32 bit of the multiplication result. - - if (isR6) { - __ MulR6(TMP, lhs_high, rhs_low); - __ MulR6(dst_high, lhs_low, rhs_high); - __ Addu(dst_high, dst_high, TMP); - __ MuhuR6(TMP, lhs_low, rhs_low); - __ Addu(dst_high, dst_high, TMP); - __ MulR6(dst_low, lhs_low, rhs_low); - } else { - __ MulR2(TMP, lhs_high, rhs_low); - __ MulR2(dst_high, lhs_low, rhs_high); - __ Addu(dst_high, dst_high, TMP); - __ MultuR2(lhs_low, rhs_low); - __ Mfhi(TMP); - __ Addu(dst_high, dst_high, TMP); - __ Mflo(dst_low); - } - break; - } - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: { - FRegister dst = locations->Out().AsFpuRegister<FRegister>(); - FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); - if (type == DataType::Type::kFloat32) { - __ MulS(dst, lhs, rhs); - } else { - __ MulD(dst, lhs, rhs); - } - break; - } - default: - LOG(FATAL) << "Unexpected mul type " << type; - } -} - -void LocationsBuilderMIPS::VisitNeg(HNeg* neg) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); - switch (neg->GetResultType()) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); - } -} - -void InstructionCodeGeneratorMIPS::VisitNeg(HNeg* instruction) { - DataType::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - - switch (type) { - case DataType::Type::kInt32: { - Register dst = locations->Out().AsRegister<Register>(); - Register src = locations->InAt(0).AsRegister<Register>(); - __ Subu(dst, ZERO, src); - break; - } - case DataType::Type::kInt64: { - Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); - Register dst_low = locations->Out().AsRegisterPairLow<Register>(); - Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register src_low = locations->InAt(0).AsRegisterPairLow<Register>(); - __ Subu(dst_low, ZERO, src_low); - __ Sltu(TMP, ZERO, dst_low); - __ Subu(dst_high, ZERO, src_high); - __ Subu(dst_high, dst_high, TMP); - break; - } - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: { - FRegister dst = locations->Out().AsFpuRegister<FRegister>(); - FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); - if (type == DataType::Type::kFloat32) { - __ NegS(dst, src); - } else { - __ NegD(dst, src); - } - break; - } - default: - LOG(FATAL) << "Unexpected neg type " << type; - } -} - -void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, LocationSummary::kCallOnMainOnly); - InvokeRuntimeCallingConvention calling_convention; - locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); -} - -void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. - QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); - codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); - DCHECK(!codegen_->IsLeafMethod()); -} - -void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, LocationSummary::kCallOnMainOnly); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); -} - -void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); -} - -void LocationsBuilderMIPS::VisitNot(HNot* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorMIPS::VisitNot(HNot* instruction) { - DataType::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - - switch (type) { - case DataType::Type::kInt32: { - Register dst = locations->Out().AsRegister<Register>(); - Register src = locations->InAt(0).AsRegister<Register>(); - __ Nor(dst, src, ZERO); - break; - } - - case DataType::Type::kInt64: { - Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); - Register dst_low = locations->Out().AsRegisterPairLow<Register>(); - Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register src_low = locations->InAt(0).AsRegisterPairLow<Register>(); - __ Nor(dst_high, src_high, ZERO); - __ Nor(dst_low, src_low, ZERO); - break; - } - - default: - LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType(); - } -} - -void LocationsBuilderMIPS::VisitBooleanNot(HBooleanNot* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorMIPS::VisitBooleanNot(HBooleanNot* instruction) { - LocationSummary* locations = instruction->GetLocations(); - __ Xori(locations->Out().AsRegister<Register>(), - locations->InAt(0).AsRegister<Register>(), - 1); -} - -void LocationsBuilderMIPS::VisitNullCheck(HNullCheck* instruction) { - LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); - locations->SetInAt(0, Location::RequiresRegister()); -} - -void CodeGeneratorMIPS::GenerateImplicitNullCheck(HNullCheck* instruction) { - if (CanMoveNullCheckToUser(instruction)) { - return; - } - Location obj = instruction->GetLocations()->InAt(0); - - __ Lw(ZERO, obj.AsRegister<Register>(), 0); - RecordPcInfo(instruction, instruction->GetDexPc()); -} - -void CodeGeneratorMIPS::GenerateExplicitNullCheck(HNullCheck* instruction) { - SlowPathCodeMIPS* slow_path = new (GetScopedAllocator()) NullCheckSlowPathMIPS(instruction); - AddSlowPath(slow_path); - - Location obj = instruction->GetLocations()->InAt(0); - - __ Beqz(obj.AsRegister<Register>(), slow_path->GetEntryLabel()); -} - -void InstructionCodeGeneratorMIPS::VisitNullCheck(HNullCheck* instruction) { - codegen_->GenerateNullCheck(instruction); -} - -void LocationsBuilderMIPS::VisitOr(HOr* instruction) { - HandleBinaryOp(instruction); -} - -void InstructionCodeGeneratorMIPS::VisitOr(HOr* instruction) { - HandleBinaryOp(instruction); -} - -void LocationsBuilderMIPS::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; -} - -void InstructionCodeGeneratorMIPS::VisitParallelMove(HParallelMove* instruction) { - if (instruction->GetNext()->IsSuspendCheck() && - instruction->GetBlock()->GetLoopInformation() != nullptr) { - HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); - // The back edge will generate the suspend check. - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); - } - - codegen_->GetMoveResolver()->EmitNativeCode(instruction); -} - -void LocationsBuilderMIPS::VisitParameterValue(HParameterValue* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); - if (location.IsStackSlot()) { - location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); - } else if (location.IsDoubleStackSlot()) { - location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); - } - locations->SetOut(location); -} - -void InstructionCodeGeneratorMIPS::VisitParameterValue(HParameterValue* instruction - ATTRIBUTE_UNUSED) { - // Nothing to do, the parameter is already at its location. -} - -void LocationsBuilderMIPS::VisitCurrentMethod(HCurrentMethod* instruction) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument)); -} - -void InstructionCodeGeneratorMIPS::VisitCurrentMethod(HCurrentMethod* instruction - ATTRIBUTE_UNUSED) { - // Nothing to do, the method is already at its location. -} - -void LocationsBuilderMIPS::VisitPhi(HPhi* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { - locations->SetInAt(i, Location::Any()); - } - locations->SetOut(Location::Any()); -} - -void InstructionCodeGeneratorMIPS::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; -} - -void LocationsBuilderMIPS::VisitRem(HRem* rem) { - DataType::Type type = rem->GetResultType(); - bool call_rem; - if ((type == DataType::Type::kInt64) && rem->InputAt(1)->IsConstant()) { - int64_t imm = CodeGenerator::GetInt64ValueOf(rem->InputAt(1)->AsConstant()); - call_rem = (imm != 0) && !IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm))); - } else { - call_rem = (type != DataType::Type::kInt32); - } - LocationSummary::CallKind call_kind = call_rem - ? LocationSummary::kCallOnMainOnly - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind); - - switch (type) { - case DataType::Type::kInt32: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - case DataType::Type::kInt64: { - if (call_rem) { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); - locations->SetInAt(1, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - locations->SetOut(calling_convention.GetReturnLocation(type)); - } else { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant())); - locations->SetOut(Location::RequiresRegister()); - } - break; - } - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(type)); - break; - } - - default: - LOG(FATAL) << "Unexpected rem type " << type; - } -} - -void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { - DataType::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - - switch (type) { - case DataType::Type::kInt32: - GenerateDivRemIntegral(instruction); - break; - case DataType::Type::kInt64: { - if (locations->InAt(1).IsConstant()) { - int64_t imm = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue(); - if (imm == 0) { - // Do not generate anything. DivZeroCheck would prevent any code to be executed. - } else if (imm == 1 || imm == -1) { - DivRemOneOrMinusOne(instruction); - } else { - DCHECK(IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm)))); - DivRemByPowerOfTwo(instruction); - } - } else { - codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); - } - break; - } - case DataType::Type::kFloat32: { - codegen_->InvokeRuntime(kQuickFmodf, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickFmodf, float, float, float>(); - break; - } - case DataType::Type::kFloat64: { - codegen_->InvokeRuntime(kQuickFmod, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickFmod, double, double, double>(); - break; - } - default: - LOG(FATAL) << "Unexpected rem type " << type; - } -} - -static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { - LocationSummary* locations = new (allocator) LocationSummary(minmax); - switch (minmax->GetResultType()) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); - break; - default: - LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); - } -} - -void InstructionCodeGeneratorMIPS::GenerateMinMaxInt(LocationSummary* locations, - bool is_min, - bool isR6, - DataType::Type type) { - if (isR6) { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions - // always change the target (output) register. If the condition is - // true the output register gets the contents of the "rs" register; - // otherwise, the output register is set to zero. One consequence - // of this is that to implement something like "rd = c==0 ? rs : rt" - // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. - // After executing this pair of instructions one of the output - // registers from the pair will necessarily contain zero. Then the - // code ORs the output registers from the SELEQZ/SELNEZ instructions - // to get the final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, b_hi, a_hi); - __ Bne(b_hi, a_hi, &compare_done); - - __ Sltu(TMP, b_lo, a_lo); - - __ Bind(&compare_done); - - if (is_min) { - __ Seleqz(AT, a_lo, TMP); - __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo - // because at this point we're - // done using a_lo/b_lo. - } else { - __ Selnez(AT, a_lo, TMP); - __ Seleqz(out_lo, b_lo, TMP); // ditto - } - __ Or(out_lo, out_lo, AT); - if (is_min) { - __ Seleqz(AT, a_hi, TMP); - __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } else { - __ Selnez(AT, a_hi, TMP); - __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } - __ Or(out_hi, out_hi, AT); - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, b, a); - if (is_min) { - __ Seleqz(TMP, a, AT); - __ Selnez(AT, b, AT); - } else { - __ Selnez(TMP, a, AT); - __ Seleqz(AT, b, AT); - } - __ Or(out, TMP, AT); - } - } - } else { // !isR6 - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, a_hi, b_hi); - __ Bne(a_hi, b_hi, &compare_done); - - __ Sltu(TMP, a_lo, b_lo); - - __ Bind(&compare_done); - - if (is_min) { - if (out_lo != a_lo) { - __ Movn(out_hi, a_hi, TMP); - __ Movn(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movz(out_hi, b_hi, TMP); - __ Movz(out_lo, b_lo, TMP); - } - } else { - if (out_lo != a_lo) { - __ Movz(out_hi, a_hi, TMP); - __ Movz(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movn(out_hi, b_hi, TMP); - __ Movn(out_lo, b_lo, TMP); - } - } - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, a, b); - if (is_min) { - if (out != a) { - __ Movn(out, a, AT); - } - if (out != b) { - __ Movz(out, b, AT); - } - } else { - if (out != a) { - __ Movz(out, a, AT); - } - if (out != b) { - __ Movn(out, b, AT); - } - } - } - } - } -} - -void InstructionCodeGeneratorMIPS::GenerateMinMaxFP(LocationSummary* locations, - bool is_min, - bool isR6, - DataType::Type type) { - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); - - if (isR6) { - MipsLabel noNaNs; - MipsLabel done; - FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); - - } else { // !isR6 - MipsLabel ordered; - MipsLabel compare; - MipsLabel select; - MipsLabel done; - - if (type == DataType::Type::kFloat64) { - __ CunD(a, b); - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CunS(a, b); - } - __ Bc1f(&ordered); - - // a or b (or both) is a NaN. Return one, which is a NaN. - if (type == DataType::Type::kFloat64) { - __ CeqD(b, b); - } else { - __ CeqS(b, b); - } - __ B(&select); - - __ Bind(&ordered); - - // Neither is a NaN. - // a == b? (-0.0 compares equal with +0.0) - // If equal, handle zeroes, else compare further. - if (type == DataType::Type::kFloat64) { - __ CeqD(a, b); - } else { - __ CeqS(a, b); - } - __ Bc1f(&compare); - - // a == b either bit for bit or one is -0.0 and the other is +0.0. - if (type == DataType::Type::kFloat64) { - __ MoveFromFpuHigh(TMP, a); - __ MoveFromFpuHigh(AT, b); - } else { - __ Mfc1(TMP, a); - __ Mfc1(AT, b); - } - - if (is_min) { - // -0.0 prevails over +0.0. - __ Or(TMP, TMP, AT); - } else { - // +0.0 prevails over -0.0. - __ And(TMP, TMP, AT); - } - - if (type == DataType::Type::kFloat64) { - __ Mfc1(AT, a); - __ Mtc1(AT, out); - __ MoveToFpuHigh(TMP, out); - } else { - __ Mtc1(TMP, out); - } - __ B(&done); - - __ Bind(&compare); - - if (type == DataType::Type::kFloat64) { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeD(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeD(b, a); // b <= a - } - } else { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeS(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeS(b, a); // b <= a - } - } - - __ Bind(&select); - - if (type == DataType::Type::kFloat64) { - __ MovtD(out, a); - __ MovfD(out, b); - } else { - __ MovtS(out, a); - __ MovfS(out, b); - } - - __ Bind(&done); - } -} - -void InstructionCodeGeneratorMIPS::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - DataType::Type type = minmax->GetResultType(); - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - GenerateMinMaxInt(minmax->GetLocations(), is_min, isR6, type); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - GenerateMinMaxFP(minmax->GetLocations(), is_min, isR6, type); - break; - default: - LOG(FATAL) << "Unexpected type for HMinMax " << type; - } -} - -void LocationsBuilderMIPS::VisitMin(HMin* min) { - CreateMinMaxLocations(GetGraph()->GetAllocator(), min); -} - -void InstructionCodeGeneratorMIPS::VisitMin(HMin* min) { - GenerateMinMax(min, /*is_min*/ true); -} - -void LocationsBuilderMIPS::VisitMax(HMax* max) { - CreateMinMaxLocations(GetGraph()->GetAllocator(), max); -} - -void InstructionCodeGeneratorMIPS::VisitMax(HMax* max) { - GenerateMinMax(max, /*is_min*/ false); -} - -void LocationsBuilderMIPS::VisitAbs(HAbs* abs) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); - switch (abs->GetResultType()) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - default: - LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); - } -} - -void InstructionCodeGeneratorMIPS::GenerateAbsFP(LocationSummary* locations, - DataType::Type type, - bool isR2OrNewer, - bool isR6) { - FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - - // Note, as a "quality of implementation", rather than pure "spec compliance", we require that - // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN - // (signaling NaN may become quiet though). - // - // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case, - // both regular floating point numbers and NAN values are treated alike, only the sign bit is - // affected by this instruction. - // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any - // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be - // changed when doing abs(NaN). Because of that, we clear sign bit in a different way. - if (isR6) { - if (type == DataType::Type::kFloat64) { - __ AbsD(out, in); - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ AbsS(out, in); - } - } else { - if (type == DataType::Type::kFloat64) { - if (in != out) { - __ MovD(out, in); - } - __ MoveFromFpuHigh(TMP, in); - // ins instruction is not available for R1. - if (isR2OrNewer) { - __ Ins(TMP, ZERO, 31, 1); - } else { - __ Sll(TMP, TMP, 1); - __ Srl(TMP, TMP, 1); - } - __ MoveToFpuHigh(TMP, out); - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ Mfc1(TMP, in); - // ins instruction is not available for R1. - if (isR2OrNewer) { - __ Ins(TMP, ZERO, 31, 1); - } else { - __ Sll(TMP, TMP, 1); - __ Srl(TMP, TMP, 1); - } - __ Mtc1(TMP, out); - } - } -} - -void InstructionCodeGeneratorMIPS::VisitAbs(HAbs* abs) { - LocationSummary* locations = abs->GetLocations(); - bool isR2OrNewer = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - switch (abs->GetResultType()) { - case DataType::Type::kInt32: { - Register in = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - __ Sra(AT, in, 31); - __ Xor(out, in, AT); - __ Subu(out, out, AT); - break; - } - case DataType::Type::kInt64: { - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - // The comments in this section show the analogous operations which would - // be performed if we had 64-bit registers "in", and "out". - // __ Dsra32(AT, in, 31); - __ Sra(AT, in_hi, 31); - // __ Xor(out, in, AT); - __ Xor(TMP, in_lo, AT); - __ Xor(out_hi, in_hi, AT); - // __ Dsubu(out, out, AT); - __ Subu(out_lo, TMP, AT); - __ Sltu(TMP, out_lo, TMP); - __ Addu(out_hi, out_hi, TMP); - break; - } - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - GenerateAbsFP(locations, abs->GetResultType(), isR2OrNewer, isR6); - break; - default: - LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); - } -} - -void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) { - constructor_fence->SetLocations(nullptr); -} - -void InstructionCodeGeneratorMIPS::VisitConstructorFence( - HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { - GenerateMemoryBarrier(MemBarrierKind::kStoreStore); -} - -void LocationsBuilderMIPS::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - memory_barrier->SetLocations(nullptr); -} - -void InstructionCodeGeneratorMIPS::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); -} - -void LocationsBuilderMIPS::VisitReturn(HReturn* ret) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(ret); - DataType::Type return_type = ret->InputAt(0)->GetType(); - locations->SetInAt(0, MipsReturnLocation(return_type)); -} - -void InstructionCodeGeneratorMIPS::VisitReturn(HReturn* ret ATTRIBUTE_UNUSED) { - codegen_->GenerateFrameExit(); -} - -void LocationsBuilderMIPS::VisitReturnVoid(HReturnVoid* ret) { - ret->SetLocations(nullptr); -} - -void InstructionCodeGeneratorMIPS::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { - codegen_->GenerateFrameExit(); -} - -void LocationsBuilderMIPS::VisitRor(HRor* ror) { - HandleShift(ror); -} - -void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror) { - HandleShift(ror); -} - -void LocationsBuilderMIPS::VisitShl(HShl* shl) { - HandleShift(shl); -} - -void InstructionCodeGeneratorMIPS::VisitShl(HShl* shl) { - HandleShift(shl); -} - -void LocationsBuilderMIPS::VisitShr(HShr* shr) { - HandleShift(shr); -} - -void InstructionCodeGeneratorMIPS::VisitShr(HShr* shr) { - HandleShift(shr); -} - -void LocationsBuilderMIPS::VisitSub(HSub* instruction) { - HandleBinaryOp(instruction); -} - -void InstructionCodeGeneratorMIPS::VisitSub(HSub* instruction) { - HandleBinaryOp(instruction); -} - -void LocationsBuilderMIPS::VisitStaticFieldGet(HStaticFieldGet* instruction) { - HandleFieldGet(instruction, instruction->GetFieldInfo()); -} - -void InstructionCodeGeneratorMIPS::VisitStaticFieldGet(HStaticFieldGet* instruction) { - HandleFieldGet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc()); -} - -void LocationsBuilderMIPS::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); -} - -void InstructionCodeGeneratorMIPS::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, - instruction->GetFieldInfo(), - instruction->GetDexPc(), - instruction->GetValueCanBeNull()); -} - -void LocationsBuilderMIPS::VisitUnresolvedInstanceFieldGet( - HUnresolvedInstanceFieldGet* instruction) { - FieldAccessCallingConventionMIPS calling_convention; - codegen_->CreateUnresolvedFieldLocationSummary(instruction, - instruction->GetFieldType(), - calling_convention); -} - -void InstructionCodeGeneratorMIPS::VisitUnresolvedInstanceFieldGet( - HUnresolvedInstanceFieldGet* instruction) { - FieldAccessCallingConventionMIPS calling_convention; - codegen_->GenerateUnresolvedFieldAccess(instruction, - instruction->GetFieldType(), - instruction->GetFieldIndex(), - instruction->GetDexPc(), - calling_convention); -} - -void LocationsBuilderMIPS::VisitUnresolvedInstanceFieldSet( - HUnresolvedInstanceFieldSet* instruction) { - FieldAccessCallingConventionMIPS calling_convention; - codegen_->CreateUnresolvedFieldLocationSummary(instruction, - instruction->GetFieldType(), - calling_convention); -} - -void InstructionCodeGeneratorMIPS::VisitUnresolvedInstanceFieldSet( - HUnresolvedInstanceFieldSet* instruction) { - FieldAccessCallingConventionMIPS calling_convention; - codegen_->GenerateUnresolvedFieldAccess(instruction, - instruction->GetFieldType(), - instruction->GetFieldIndex(), - instruction->GetDexPc(), - calling_convention); -} - -void LocationsBuilderMIPS::VisitUnresolvedStaticFieldGet( - HUnresolvedStaticFieldGet* instruction) { - FieldAccessCallingConventionMIPS calling_convention; - codegen_->CreateUnresolvedFieldLocationSummary(instruction, - instruction->GetFieldType(), - calling_convention); -} - -void InstructionCodeGeneratorMIPS::VisitUnresolvedStaticFieldGet( - HUnresolvedStaticFieldGet* instruction) { - FieldAccessCallingConventionMIPS calling_convention; - codegen_->GenerateUnresolvedFieldAccess(instruction, - instruction->GetFieldType(), - instruction->GetFieldIndex(), - instruction->GetDexPc(), - calling_convention); -} - -void LocationsBuilderMIPS::VisitUnresolvedStaticFieldSet( - HUnresolvedStaticFieldSet* instruction) { - FieldAccessCallingConventionMIPS calling_convention; - codegen_->CreateUnresolvedFieldLocationSummary(instruction, - instruction->GetFieldType(), - calling_convention); -} - -void InstructionCodeGeneratorMIPS::VisitUnresolvedStaticFieldSet( - HUnresolvedStaticFieldSet* instruction) { - FieldAccessCallingConventionMIPS calling_convention; - codegen_->GenerateUnresolvedFieldAccess(instruction, - instruction->GetFieldType(), - instruction->GetFieldIndex(), - instruction->GetDexPc(), - calling_convention); -} - -void LocationsBuilderMIPS::VisitSuspendCheck(HSuspendCheck* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, LocationSummary::kCallOnSlowPath); - // In suspend check slow path, usually there are no caller-save registers at all. - // If SIMD instructions are present, however, we force spilling all live SIMD - // registers in full width (since the runtime only saves/restores lower part). - locations->SetCustomSlowPathCallerSaves( - GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); -} - -void InstructionCodeGeneratorMIPS::VisitSuspendCheck(HSuspendCheck* instruction) { - HBasicBlock* block = instruction->GetBlock(); - if (block->GetLoopInformation() != nullptr) { - DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); - // The back edge will generate the suspend check. - return; - } - if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { - // The goto will generate the suspend check. - return; - } - GenerateSuspendCheck(instruction, nullptr); -} - -void LocationsBuilderMIPS::VisitThrow(HThrow* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, LocationSummary::kCallOnMainOnly); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); -} - -void InstructionCodeGeneratorMIPS::VisitThrow(HThrow* instruction) { - codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); -} - -void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) { - DataType::Type input_type = conversion->GetInputType(); - DataType::Type result_type = conversion->GetResultType(); - DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) - << input_type << " -> " << result_type; - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - - if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) || - (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) { - LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; - } - - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - if (!isR6 && - ((DataType::IsFloatingPointType(result_type) && input_type == DataType::Type::kInt64) || - (result_type == DataType::Type::kInt64 && DataType::IsFloatingPointType(input_type)))) { - call_kind = LocationSummary::kCallOnMainOnly; - } - - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind); - - if (call_kind == LocationSummary::kNoCall) { - if (DataType::IsFloatingPointType(input_type)) { - locations->SetInAt(0, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(0, Location::RequiresRegister()); - } - - if (DataType::IsFloatingPointType(result_type)) { - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } - } else { - InvokeRuntimeCallingConvention calling_convention; - - if (DataType::IsFloatingPointType(input_type)) { - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - } else { - DCHECK_EQ(input_type, DataType::Type::kInt64); - locations->SetInAt(0, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); - } - - locations->SetOut(calling_convention.GetReturnLocation(result_type)); - } -} - -void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversion) { - LocationSummary* locations = conversion->GetLocations(); - DataType::Type result_type = conversion->GetResultType(); - DataType::Type input_type = conversion->GetInputType(); - bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - - DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) - << input_type << " -> " << result_type; - - if (result_type == DataType::Type::kInt64 && DataType::IsIntegralType(input_type)) { - Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); - Register dst_low = locations->Out().AsRegisterPairLow<Register>(); - Register src = locations->InAt(0).AsRegister<Register>(); - - if (dst_low != src) { - __ Move(dst_low, src); - } - __ Sra(dst_high, src, 31); - } else if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) { - Register dst = locations->Out().AsRegister<Register>(); - Register src = (input_type == DataType::Type::kInt64) - ? locations->InAt(0).AsRegisterPairLow<Register>() - : locations->InAt(0).AsRegister<Register>(); - - switch (result_type) { - case DataType::Type::kUint8: - __ Andi(dst, src, 0xFF); - break; - case DataType::Type::kInt8: - if (has_sign_extension) { - __ Seb(dst, src); - } else { - __ Sll(dst, src, 24); - __ Sra(dst, dst, 24); - } - break; - case DataType::Type::kUint16: - __ Andi(dst, src, 0xFFFF); - break; - case DataType::Type::kInt16: - if (has_sign_extension) { - __ Seh(dst, src); - } else { - __ Sll(dst, src, 16); - __ Sra(dst, dst, 16); - } - break; - case DataType::Type::kInt32: - if (dst != src) { - __ Move(dst, src); - } - break; - - default: - LOG(FATAL) << "Unexpected type conversion from " << input_type - << " to " << result_type; - } - } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) { - if (input_type == DataType::Type::kInt64) { - if (isR6) { - // cvt.s.l/cvt.d.l requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary - // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction. - Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register src_low = locations->InAt(0).AsRegisterPairLow<Register>(); - FRegister dst = locations->Out().AsFpuRegister<FRegister>(); - __ Mtc1(src_low, FTMP); - __ Mthc1(src_high, FTMP); - if (result_type == DataType::Type::kFloat32) { - __ Cvtsl(dst, FTMP); - } else { - __ Cvtdl(dst, FTMP); - } - } else { - QuickEntrypointEnum entrypoint = - (result_type == DataType::Type::kFloat32) ? kQuickL2f : kQuickL2d; - codegen_->InvokeRuntime(entrypoint, conversion, conversion->GetDexPc()); - if (result_type == DataType::Type::kFloat32) { - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); - } else { - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); - } - } - } else { - Register src = locations->InAt(0).AsRegister<Register>(); - FRegister dst = locations->Out().AsFpuRegister<FRegister>(); - __ Mtc1(src, FTMP); - if (result_type == DataType::Type::kFloat32) { - __ Cvtsw(dst, FTMP); - } else { - __ Cvtdw(dst, FTMP); - } - } - } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) { - CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64); - - // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum - // value of the output type if the input is outside of the range after the truncation or - // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct - // results. This matches the desired float/double-to-int/long conversion exactly. - // - // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive - // value when the input is either a NaN or is outside of the range of the output type - // after the truncation. IOW, the three special cases (NaN, too small, too big) produce - // the same result. - // - // The code takes care of the different behaviors by first comparing the input to the - // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int). - // If the input is greater than or equal to the minimum, it procedes to the truncate - // instruction, which will handle such an input the same way irrespective of NAN2008. - // Otherwise the input is compared to itself to determine whether it is a NaN or not - // in order to return either zero or the minimum value. - if (result_type == DataType::Type::kInt64) { - if (isR6) { - // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary - // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction. - FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); - Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); - Register dst_low = locations->Out().AsRegisterPairLow<Register>(); - - if (input_type == DataType::Type::kFloat32) { - __ TruncLS(FTMP, src); - } else { - __ TruncLD(FTMP, src); - } - __ Mfc1(dst_low, FTMP); - __ Mfhc1(dst_high, FTMP); - } else { - QuickEntrypointEnum entrypoint = - (input_type == DataType::Type::kFloat32) ? kQuickF2l : kQuickD2l; - codegen_->InvokeRuntime(entrypoint, conversion, conversion->GetDexPc()); - if (input_type == DataType::Type::kFloat32) { - CheckEntrypointTypes<kQuickF2l, int64_t, float>(); - } else { - CheckEntrypointTypes<kQuickD2l, int64_t, double>(); - } - } - } else { - FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); - Register dst = locations->Out().AsRegister<Register>(); - MipsLabel truncate; - MipsLabel done; - - if (!isR6) { - if (input_type == DataType::Type::kFloat32) { - uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min()); - __ LoadConst32(TMP, min_val); - __ Mtc1(TMP, FTMP); - } else { - uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); - __ LoadConst32(TMP, High32Bits(min_val)); - __ Mtc1(ZERO, FTMP); - __ MoveToFpuHigh(TMP, FTMP); - } - - if (input_type == DataType::Type::kFloat32) { - __ ColeS(0, FTMP, src); - } else { - __ ColeD(0, FTMP, src); - } - __ Bc1t(0, &truncate); - - if (input_type == DataType::Type::kFloat32) { - __ CeqS(0, src, src); - } else { - __ CeqD(0, src, src); - } - __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); - __ Movf(dst, ZERO, 0); - - __ B(&done); - - __ Bind(&truncate); - } - - if (input_type == DataType::Type::kFloat32) { - __ TruncWS(FTMP, src); - } else { - __ TruncWD(FTMP, src); - } - __ Mfc1(dst, FTMP); - - if (!isR6) { - __ Bind(&done); - } - } - } else if (DataType::IsFloatingPointType(result_type) && - DataType::IsFloatingPointType(input_type)) { - FRegister dst = locations->Out().AsFpuRegister<FRegister>(); - FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); - if (result_type == DataType::Type::kFloat32) { - __ Cvtsd(dst, src); - } else { - __ Cvtds(dst, src); - } - } else { - LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type - << " to " << result_type; - } -} - -void LocationsBuilderMIPS::VisitUShr(HUShr* ushr) { - HandleShift(ushr); -} - -void InstructionCodeGeneratorMIPS::VisitUShr(HUShr* ushr) { - HandleShift(ushr); -} - -void LocationsBuilderMIPS::VisitXor(HXor* instruction) { - HandleBinaryOp(instruction); -} - -void InstructionCodeGeneratorMIPS::VisitXor(HXor* instruction) { - HandleBinaryOp(instruction); -} - -void LocationsBuilderMIPS::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { - // Nothing to do, this should be removed during prepare for register allocator. - LOG(FATAL) << "Unreachable"; -} - -void InstructionCodeGeneratorMIPS::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { - // Nothing to do, this should be removed during prepare for register allocator. - LOG(FATAL) << "Unreachable"; -} - -void LocationsBuilderMIPS::VisitEqual(HEqual* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS::VisitEqual(HEqual* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS::VisitNotEqual(HNotEqual* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS::VisitNotEqual(HNotEqual* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS::VisitLessThan(HLessThan* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS::VisitLessThan(HLessThan* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS::VisitGreaterThan(HGreaterThan* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS::VisitGreaterThan(HGreaterThan* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS::VisitBelow(HBelow* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS::VisitBelow(HBelow* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS::VisitAbove(HAbove* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS::VisitAbove(HAbove* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - if (!codegen_->GetInstructionSetFeatures().IsR6()) { - uint32_t num_entries = switch_instr->GetNumEntries(); - if (num_entries > InstructionCodeGeneratorMIPS::kPackedSwitchJumpTableThreshold) { - // When there's no HMipsComputeBaseMethodAddress input, R2 uses the NAL - // instruction to simulate PC-relative addressing when accessing the jump table. - // NAL clobbers RA. Make sure RA is preserved. - codegen_->ClobberRA(); - } - } -} - -void InstructionCodeGeneratorMIPS::GenPackedSwitchWithCompares(Register value_reg, - int32_t lower_bound, - uint32_t num_entries, - HBasicBlock* switch_block, - HBasicBlock* default_block) { - // Create a set of compare/jumps. - Register temp_reg = TMP; - __ Addiu32(temp_reg, value_reg, -lower_bound); - // Jump to default if index is negative - // Note: We don't check the case that index is positive while value < lower_bound, because in - // this case, index >= num_entries must be true. So that we can save one branch instruction. - __ Bltz(temp_reg, codegen_->GetLabelOf(default_block)); - - const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); - // Jump to successors[0] if value == lower_bound. - __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0])); - int32_t last_index = 0; - for (; num_entries - last_index > 2; last_index += 2) { - __ Addiu(temp_reg, temp_reg, -2); - // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. - __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); - // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. - __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2])); - } - if (num_entries - last_index == 2) { - // The last missing case_value. - __ Addiu(temp_reg, temp_reg, -1); - __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); - } - - // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_block, default_block)) { - __ B(codegen_->GetLabelOf(default_block)); - } -} - -void InstructionCodeGeneratorMIPS::GenTableBasedPackedSwitch(Register value_reg, - Register constant_area, - int32_t lower_bound, - uint32_t num_entries, - HBasicBlock* switch_block, - HBasicBlock* default_block) { - // Create a jump table. - std::vector<MipsLabel*> labels(num_entries); - const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); - for (uint32_t i = 0; i < num_entries; i++) { - labels[i] = codegen_->GetLabelOf(successors[i]); - } - JumpTable* table = __ CreateJumpTable(std::move(labels)); - - // Is the value in range? - __ Addiu32(TMP, value_reg, -lower_bound); - if (IsInt<16>(static_cast<int32_t>(num_entries))) { - __ Sltiu(AT, TMP, num_entries); - __ Beqz(AT, codegen_->GetLabelOf(default_block)); - } else { - __ LoadConst32(AT, num_entries); - __ Bgeu(TMP, AT, codegen_->GetLabelOf(default_block)); - } - - // We are in the range of the table. - // Load the target address from the jump table, indexing by the value. - __ LoadLabelAddress(AT, constant_area, table->GetLabel()); - __ ShiftAndAdd(TMP, TMP, AT, 2, TMP); - __ Lw(TMP, TMP, 0); - // Compute the absolute target address by adding the table start address - // (the table contains offsets to targets relative to its start). - __ Addu(TMP, TMP, AT); - // And jump. - __ Jr(TMP); - __ NopIfNoReordering(); -} - -void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) { - int32_t lower_bound = switch_instr->GetStartValue(); - uint32_t num_entries = switch_instr->GetNumEntries(); - LocationSummary* locations = switch_instr->GetLocations(); - Register value_reg = locations->InAt(0).AsRegister<Register>(); - HBasicBlock* switch_block = switch_instr->GetBlock(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - - if (num_entries > kPackedSwitchJumpTableThreshold) { - // R6 uses PC-relative addressing to access the jump table. - // - // R2, OTOH, uses an HMipsComputeBaseMethodAddress input (when available) - // to access the jump table and it is implemented by changing HPackedSwitch to - // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress (see - // VisitMipsPackedSwitch()). - // - // When there's no HMipsComputeBaseMethodAddress input (e.g. in presence of - // irreducible loops), R2 uses the NAL instruction to simulate PC-relative - // addressing. - GenTableBasedPackedSwitch(value_reg, - ZERO, - lower_bound, - num_entries, - switch_block, - default_block); - } else { - GenPackedSwitchWithCompares(value_reg, - lower_bound, - num_entries, - switch_block, - default_block); - } -} - -void LocationsBuilderMIPS::VisitMipsPackedSwitch(HMipsPackedSwitch* switch_instr) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - // Constant area pointer (HMipsComputeBaseMethodAddress). - locations->SetInAt(1, Location::RequiresRegister()); -} - -void InstructionCodeGeneratorMIPS::VisitMipsPackedSwitch(HMipsPackedSwitch* switch_instr) { - int32_t lower_bound = switch_instr->GetStartValue(); - uint32_t num_entries = switch_instr->GetNumEntries(); - LocationSummary* locations = switch_instr->GetLocations(); - Register value_reg = locations->InAt(0).AsRegister<Register>(); - Register constant_area = locations->InAt(1).AsRegister<Register>(); - HBasicBlock* switch_block = switch_instr->GetBlock(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - - // This is an R2-only path. HPackedSwitch has been changed to - // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress - // required to address the jump table relative to PC. - GenTableBasedPackedSwitch(value_reg, - constant_area, - lower_bound, - num_entries, - switch_block, - default_block); -} - -void LocationsBuilderMIPS::VisitMipsComputeBaseMethodAddress( - HMipsComputeBaseMethodAddress* insn) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorMIPS::VisitMipsComputeBaseMethodAddress( - HMipsComputeBaseMethodAddress* insn) { - LocationSummary* locations = insn->GetLocations(); - Register reg = locations->Out().AsRegister<Register>(); - - CHECK(!codegen_->GetInstructionSetFeatures().IsR6()); - - // Generate a dummy PC-relative call to obtain PC. - __ Nal(); - // Grab the return address off RA. - __ Move(reg, RA); - - // Remember this offset (the obtained PC value) for later use with constant area. - __ BindPcRelBaseLabel(); -} - -void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { - // The trampoline uses the same calling convention as dex calling conventions, - // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain - // the method_idx. - HandleInvoke(invoke); -} - -void InstructionCodeGeneratorMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { - codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); -} - -void LocationsBuilderMIPS::VisitClassTableGet(HClassTableGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { - uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( - instruction->GetIndex(), kMipsPointerSize).SizeValue(); - __ LoadFromOffset(kLoadWord, - locations->Out().AsRegister<Register>(), - locations->InAt(0).AsRegister<Register>(), - method_offset); - } else { - uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex(), kMipsPointerSize)); - __ LoadFromOffset(kLoadWord, - locations->Out().AsRegister<Register>(), - locations->InAt(0).AsRegister<Register>(), - mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value()); - __ LoadFromOffset(kLoadWord, - locations->Out().AsRegister<Register>(), - locations->Out().AsRegister<Register>(), - method_offset); - } -} - -void LocationsBuilderMIPS::VisitIntermediateAddress(HIntermediateAddress* instruction - ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; -} - -void InstructionCodeGeneratorMIPS::VisitIntermediateAddress(HIntermediateAddress* instruction - ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; -} - -#undef __ -#undef QUICK_ENTRY_POINT - -} // namespace mips -} // namespace art diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h deleted file mode 100644 index 50807310b6..0000000000 --- a/compiler/optimizing/code_generator_mips.h +++ /dev/null @@ -1,732 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_ -#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_ - -#include "code_generator.h" -#include "dex/dex_file_types.h" -#include "dex/string_reference.h" -#include "dex/type_reference.h" -#include "driver/compiler_options.h" -#include "nodes.h" -#include "parallel_move_resolver.h" -#include "utils/mips/assembler_mips.h" - -namespace art { -namespace mips { - -// InvokeDexCallingConvention registers - -static constexpr Register kParameterCoreRegisters[] = - { A1, A2, A3, T0, T1 }; -static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); - -static constexpr FRegister kParameterFpuRegisters[] = - { F8, F10, F12, F14, F16, F18 }; -static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters); - - -// InvokeRuntimeCallingConvention registers - -static constexpr Register kRuntimeParameterCoreRegisters[] = - { A0, A1, A2, A3 }; -static constexpr size_t kRuntimeParameterCoreRegistersLength = - arraysize(kRuntimeParameterCoreRegisters); - -static constexpr FRegister kRuntimeParameterFpuRegisters[] = - { F12, F14 }; -static constexpr size_t kRuntimeParameterFpuRegistersLength = - arraysize(kRuntimeParameterFpuRegisters); - - -static constexpr Register kCoreCalleeSaves[] = - { S0, S1, S2, S3, S4, S5, S6, S7, FP, RA }; -static constexpr FRegister kFpuCalleeSaves[] = - { F20, F22, F24, F26, F28, F30 }; - - -class CodeGeneratorMIPS; - -VectorRegister VectorRegisterFrom(Location location); - -class InvokeDexCallingConvention : public CallingConvention<Register, FRegister> { - public: - InvokeDexCallingConvention() - : CallingConvention(kParameterCoreRegisters, - kParameterCoreRegistersLength, - kParameterFpuRegisters, - kParameterFpuRegistersLength, - kMipsPointerSize) {} - - private: - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); -}; - -class InvokeDexCallingConventionVisitorMIPS : public InvokeDexCallingConventionVisitor { - public: - InvokeDexCallingConventionVisitorMIPS() {} - virtual ~InvokeDexCallingConventionVisitorMIPS() {} - - Location GetNextLocation(DataType::Type type) override; - Location GetReturnLocation(DataType::Type type) const override; - Location GetMethodLocation() const override; - - private: - InvokeDexCallingConvention calling_convention; - - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorMIPS); -}; - -class InvokeRuntimeCallingConvention : public CallingConvention<Register, FRegister> { - public: - InvokeRuntimeCallingConvention() - : CallingConvention(kRuntimeParameterCoreRegisters, - kRuntimeParameterCoreRegistersLength, - kRuntimeParameterFpuRegisters, - kRuntimeParameterFpuRegistersLength, - kMipsPointerSize) {} - - Location GetReturnLocation(DataType::Type return_type); - - private: - DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); -}; - -class FieldAccessCallingConventionMIPS : public FieldAccessCallingConvention { - public: - FieldAccessCallingConventionMIPS() {} - - Location GetObjectLocation() const override { - return Location::RegisterLocation(A1); - } - Location GetFieldIndexLocation() const override { - return Location::RegisterLocation(A0); - } - Location GetReturnLocation(DataType::Type type) const override { - return DataType::Is64BitType(type) - ? Location::RegisterPairLocation(V0, V1) - : Location::RegisterLocation(V0); - } - Location GetSetValueLocation(DataType::Type type, bool is_instance) const override { - return DataType::Is64BitType(type) - ? Location::RegisterPairLocation(A2, A3) - : (is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1)); - } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { - return Location::FpuRegisterLocation(F0); - } - - private: - DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionMIPS); -}; - -class ParallelMoveResolverMIPS : public ParallelMoveResolverWithSwap { - public: - ParallelMoveResolverMIPS(ArenaAllocator* allocator, CodeGeneratorMIPS* codegen) - : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} - - void EmitMove(size_t index) override; - void EmitSwap(size_t index) override; - void SpillScratch(int reg) override; - void RestoreScratch(int reg) override; - - void Exchange(int index1, int index2, bool double_slot); - void ExchangeQuadSlots(int index1, int index2); - - MipsAssembler* GetAssembler() const; - - private: - CodeGeneratorMIPS* const codegen_; - - DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverMIPS); -}; - -class SlowPathCodeMIPS : public SlowPathCode { - public: - explicit SlowPathCodeMIPS(HInstruction* instruction) - : SlowPathCode(instruction), entry_label_(), exit_label_() {} - - MipsLabel* GetEntryLabel() { return &entry_label_; } - MipsLabel* GetExitLabel() { return &exit_label_; } - - private: - MipsLabel entry_label_; - MipsLabel exit_label_; - - DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS); -}; - -class LocationsBuilderMIPS : public HGraphVisitor { - public: - LocationsBuilderMIPS(HGraph* graph, CodeGeneratorMIPS* codegen) - : HGraphVisitor(graph), codegen_(codegen) {} - -#define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) override; - - FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) - FOR_EACH_CONCRETE_INSTRUCTION_MIPS(DECLARE_VISIT_INSTRUCTION) - -#undef DECLARE_VISIT_INSTRUCTION - - void VisitInstruction(HInstruction* instruction) override { - LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() - << " (id " << instruction->GetId() << ")"; - } - - private: - void HandleInvoke(HInvoke* invoke); - void HandleBinaryOp(HBinaryOperation* operation); - void HandleCondition(HCondition* instruction); - void HandleShift(HBinaryOperation* operation); - void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); - void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); - Location RegisterOrZeroConstant(HInstruction* instruction); - Location FpuRegisterOrConstantForStore(HInstruction* instruction); - - InvokeDexCallingConventionVisitorMIPS parameter_visitor_; - - CodeGeneratorMIPS* const codegen_; - - DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS); -}; - -class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { - public: - InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen); - -#define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) override; - - FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) - FOR_EACH_CONCRETE_INSTRUCTION_MIPS(DECLARE_VISIT_INSTRUCTION) - -#undef DECLARE_VISIT_INSTRUCTION - - void VisitInstruction(HInstruction* instruction) override { - LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() - << " (id " << instruction->GetId() << ")"; - } - - MipsAssembler* GetAssembler() const { return assembler_; } - - // Compare-and-jump packed switch generates approx. 3 + 2.5 * N 32-bit - // instructions for N cases. - // Table-based packed switch generates approx. 11 32-bit instructions - // and N 32-bit data words for N cases. - // At N = 6 they come out as 18 and 17 32-bit words respectively. - // We switch to the table-based method starting with 7 cases. - static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; - - void GenerateMemoryBarrier(MemBarrierKind kind); - - private: - void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg); - void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); - void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); - void HandleBinaryOp(HBinaryOperation* operation); - void HandleCondition(HCondition* instruction); - void HandleShift(HBinaryOperation* operation); - void HandleFieldSet(HInstruction* instruction, - const FieldInfo& field_info, - uint32_t dex_pc, - bool value_can_be_null); - void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); - - void GenerateMinMaxInt(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); - void GenerateMinMaxFP(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); - void GenerateMinMax(HBinaryOperation*, bool is_min); - void GenerateAbsFP(LocationSummary* locations, DataType::Type type, bool isR2OrNewer, bool isR6); - - // Generate a heap reference load using one register `out`: - // - // out <- *(out + offset) - // - // while honoring heap poisoning and/or read barriers (if any). - // - // Location `maybe_temp` is used when generating a read barrier and - // shall be a register in that case; it may be an invalid location - // otherwise. - void GenerateReferenceLoadOneRegister(HInstruction* instruction, - Location out, - uint32_t offset, - Location maybe_temp, - ReadBarrierOption read_barrier_option); - // Generate a heap reference load using two different registers - // `out` and `obj`: - // - // out <- *(obj + offset) - // - // while honoring heap poisoning and/or read barriers (if any). - // - // Location `maybe_temp` is used when generating a Baker's (fast - // path) read barrier and shall be a register in that case; it may - // be an invalid location otherwise. - void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, - Location out, - Location obj, - uint32_t offset, - Location maybe_temp, - ReadBarrierOption read_barrier_option); - - // Generate a GC root reference load: - // - // root <- *(obj + offset) - // - // while honoring read barriers (if any). - void GenerateGcRootFieldLoad(HInstruction* instruction, - Location root, - Register obj, - uint32_t offset, - ReadBarrierOption read_barrier_option, - MipsLabel* label_low = nullptr); - - void GenerateIntCompare(IfCondition cond, LocationSummary* locations); - // When the function returns `false` it means that the condition holds if `dst` is non-zero - // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero - // `dst` are exchanged. - bool MaterializeIntCompare(IfCondition cond, - LocationSummary* input_locations, - Register dst); - void GenerateIntCompareAndBranch(IfCondition cond, - LocationSummary* locations, - MipsLabel* label); - void GenerateLongCompare(IfCondition cond, LocationSummary* locations); - void GenerateLongCompareAndBranch(IfCondition cond, - LocationSummary* locations, - MipsLabel* label); - void GenerateFpCompare(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* locations); - // When the function returns `false` it means that the condition holds if the condition - // code flag `cc` is non-zero and doesn't hold if `cc` is zero. If it returns `true`, - // the roles of zero and non-zero values of the `cc` flag are exchanged. - bool MaterializeFpCompareR2(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* input_locations, - int cc); - // When the function returns `false` it means that the condition holds if `dst` is non-zero - // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero - // `dst` are exchanged. - bool MaterializeFpCompareR6(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* input_locations, - FRegister dst); - void GenerateFpCompareAndBranch(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* locations, - MipsLabel* label); - void GenerateTestAndBranch(HInstruction* instruction, - size_t condition_input_index, - MipsLabel* true_target, - MipsLabel* false_target); - void DivRemOneOrMinusOne(HBinaryOperation* instruction); - void DivRemByPowerOfTwo(HBinaryOperation* instruction); - void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); - void GenerateDivRemIntegral(HBinaryOperation* instruction); - void HandleGoto(HInstruction* got, HBasicBlock* successor); - void GenPackedSwitchWithCompares(Register value_reg, - int32_t lower_bound, - uint32_t num_entries, - HBasicBlock* switch_block, - HBasicBlock* default_block); - void GenTableBasedPackedSwitch(Register value_reg, - Register constant_area, - int32_t lower_bound, - uint32_t num_entries, - HBasicBlock* switch_block, - HBasicBlock* default_block); - - int32_t VecAddress(LocationSummary* locations, - size_t size, - /* out */ Register* adjusted_base); - void GenConditionalMoveR2(HSelect* select); - void GenConditionalMoveR6(HSelect* select); - - MipsAssembler* const assembler_; - CodeGeneratorMIPS* const codegen_; - - DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorMIPS); -}; - -class CodeGeneratorMIPS : public CodeGenerator { - public: - CodeGeneratorMIPS(HGraph* graph, - const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats = nullptr); - virtual ~CodeGeneratorMIPS() {} - - void ComputeSpillMask() override; - bool HasAllocatedCalleeSaveRegisters() const override; - void GenerateFrameEntry() override; - void GenerateFrameExit() override; - - void Bind(HBasicBlock* block) override; - - void MoveConstant(Location location, HConstant* c); - - size_t GetWordSize() const override { return kMipsWordSize; } - - size_t GetFloatingPointSpillSlotSize() const override { - return GetGraph()->HasSIMD() - ? 2 * kMipsDoublewordSize // 16 bytes for each spill. - : 1 * kMipsDoublewordSize; // 8 bytes for each spill. - } - - uintptr_t GetAddressOf(HBasicBlock* block) override { - return assembler_.GetLabelLocation(GetLabelOf(block)); - } - - HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } - MipsAssembler* GetAssembler() override { return &assembler_; } - const MipsAssembler& GetAssembler() const override { return assembler_; } - - // Emit linker patches. - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; - - // Fast path implementation of ReadBarrier::Barrier for a heap - // reference field load when Baker's read barriers are used. - void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location temp, - bool needs_null_check); - // Fast path implementation of ReadBarrier::Barrier for a heap - // reference array load when Baker's read barriers are used. - void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - uint32_t data_offset, - Location index, - Location temp, - bool needs_null_check); - - // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, - // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. - // - // Load the object reference located at the address - // `obj + offset + (index << scale_factor)`, held by object `obj`, into - // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - Location temp, - bool needs_null_check, - bool always_update_field = false); - - // Generate a read barrier for a heap reference within `instruction` - // using a slow path. - // - // A read barrier for an object reference read from the heap is - // implemented as a call to the artReadBarrierSlow runtime entry - // point, which is passed the values in locations `ref`, `obj`, and - // `offset`: - // - // mirror::Object* artReadBarrierSlow(mirror::Object* ref, - // mirror::Object* obj, - // uint32_t offset); - // - // The `out` location contains the value returned by - // artReadBarrierSlow. - // - // When `index` is provided (i.e. for array accesses), the offset - // value passed to artReadBarrierSlow is adjusted to take `index` - // into account. - void GenerateReadBarrierSlow(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // If read barriers are enabled, generate a read barrier for a heap - // reference using a slow path. If heap poisoning is enabled, also - // unpoison the reference in `out`. - void MaybeGenerateReadBarrierSlow(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // Generate a read barrier for a GC root within `instruction` using - // a slow path. - // - // A read barrier for an object reference GC root is implemented as - // a call to the artReadBarrierForRootSlow runtime entry point, - // which is passed the value in location `root`: - // - // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); - // - // The `out` location contains the value returned by - // artReadBarrierForRootSlow. - void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); - - void MarkGCCard(Register object, Register value, bool value_can_be_null); - - // Register allocation. - - void SetupBlockedRegisters() const override; - - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; - void ClobberRA() { - clobbered_ra_ = true; - } - - void DumpCoreRegister(std::ostream& stream, int reg) const override; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - - InstructionSet GetInstructionSet() const override { return InstructionSet::kMips; } - - const MipsInstructionSetFeatures& GetInstructionSetFeatures() const; - - MipsLabel* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<MipsLabel>(block_labels_, block); - } - - void Initialize() override { - block_labels_ = CommonInitializeLabels<MipsLabel>(); - } - - void Finalize(CodeAllocator* allocator) override; - - // Code generation helpers. - - void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; - - void MoveConstant(Location destination, int32_t value) override; - - void AddLocationAsTemp(Location location, LocationSummary* locations) override; - - // Generate code to invoke a runtime entry point. - void InvokeRuntime(QuickEntrypointEnum entrypoint, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) override; - - // Generate code to invoke a runtime entry point, but do not record - // PC-related information in a stack map. - void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, - HInstruction* instruction, - SlowPathCode* slow_path, - bool direct); - - void GenerateInvokeRuntime(int32_t entry_point_offset, bool direct); - - ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; } - - bool NeedsTwoRegisters(DataType::Type type) const override { - return type == DataType::Type::kInt64; - } - - // Check if the desired_string_load_kind is supported. If it is, return it, - // otherwise return a fall-back kind that should be used instead. - HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) override; - - // Check if the desired_class_load_kind is supported. If it is, return it, - // otherwise return a fall-back kind that should be used instead. - HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) override; - - // Check if the desired_dispatch_info is supported. If it is, return it, - // otherwise return a fall-back info that should be used instead. - HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - ArtMethod* method) override; - - void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; - void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; - - void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, - DataType::Type type ATTRIBUTE_UNUSED) override { - UNIMPLEMENTED(FATAL) << "Not implemented on MIPS"; - } - - void GenerateNop() override; - void GenerateImplicitNullCheck(HNullCheck* instruction) override; - void GenerateExplicitNullCheck(HNullCheck* instruction) override; - - // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, - // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. - // - // The 16-bit halves of the 32-bit PC-relative offset are patched separately, necessitating - // two patches/infos. There can be more than two patches/infos if the instruction supplying - // the high half is shared with e.g. a slow path, while the low half is supplied by separate - // instructions, e.g.: - // lui r1, high // patch - // addu r1, r1, rbase - // lw r2, low(r1) // patch - // beqz r2, slow_path - // back: - // ... - // slow_path: - // ... - // sw r2, low(r1) // patch - // b back - struct PcRelativePatchInfo : PatchInfo<MipsLabel> { - PcRelativePatchInfo(const DexFile* dex_file, - uint32_t off_or_idx, - const PcRelativePatchInfo* info_high) - : PatchInfo<MipsLabel>(dex_file, off_or_idx), - pc_rel_label(), - patch_info_high(info_high) { } - - // Label for the instruction corresponding to PC+0. Not bound or used in low half patches. - // Not bound in high half patches on R2 when using HMipsComputeBaseMethodAddress. - // Bound in high half patches on R2 when using the NAL instruction instead of - // HMipsComputeBaseMethodAddress. - // Bound in high half patches on R6. - MipsLabel pc_rel_label; - // Pointer to the info for the high half patch or nullptr if this is the high half patch info. - const PcRelativePatchInfo* patch_info_high; - - private: - PcRelativePatchInfo(PcRelativePatchInfo&& other) = delete; - DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); - }; - - PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, - dex::TypeIndex type_index, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, - dex::TypeIndex type_index, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high = nullptr); - Literal* DeduplicateBootImageAddressLiteral(uint32_t address); - - void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, - Register out, - Register base); - - void LoadBootImageAddress(Register reg, uint32_t boot_image_reference); - void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); - - // The JitPatchInfo is used for JIT string and class loads. - struct JitPatchInfo { - JitPatchInfo(const DexFile& dex_file, uint64_t idx) - : target_dex_file(dex_file), index(idx) { } - JitPatchInfo(JitPatchInfo&& other) = default; - - const DexFile& target_dex_file; - // String/type index. - uint64_t index; - // Label for the instruction loading the most significant half of the address. - MipsLabel high_label; - // Label for the instruction supplying the least significant half of the address. - MipsLabel low_label; - }; - - void PatchJitRootUse(uint8_t* code, - const uint8_t* roots_data, - const JitPatchInfo& info, - uint64_t index_in_table) const; - JitPatchInfo* NewJitRootStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, - Handle<mirror::String> handle); - JitPatchInfo* NewJitRootClassPatch(const DexFile& dex_file, - dex::TypeIndex type_index, - Handle<mirror::Class> handle); - - private: - Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); - - using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; - - Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); - PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file, - uint32_t offset_or_index, - const PcRelativePatchInfo* info_high, - ArenaDeque<PcRelativePatchInfo>* patches); - - template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> - void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, - ArenaVector<linker::LinkerPatch>* linker_patches); - - // Labels for each block that will be compiled. - MipsLabel* block_labels_; - MipsLabel frame_entry_label_; - LocationsBuilderMIPS location_builder_; - InstructionCodeGeneratorMIPS instruction_visitor_; - ParallelMoveResolverMIPS move_resolver_; - MipsAssembler assembler_; - - // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. - Uint32ToLiteralMap uint32_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. - // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). - ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; - // PC-relative method patch info for kBssEntry. - ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; - // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; - // PC-relative type patch info for kBssEntry. - ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; - // PC-relative String patch info for kBssEntry. - ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; - // PC-relative patch info for IntrinsicObjects. - ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; - - // Patches for string root accesses in JIT compiled code. - ArenaDeque<JitPatchInfo> jit_string_patches_; - // Patches for class root accesses in JIT compiled code. - ArenaDeque<JitPatchInfo> jit_class_patches_; - - // PC-relative loads on R2 clobber RA, which may need to be preserved explicitly in leaf methods. - // This is a flag set by pc_relative_fixups_mips and dex_cache_array_fixups_mips optimizations. - bool clobbered_ra_; - - DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS); -}; - -} // namespace mips -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_ diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc deleted file mode 100644 index 0d3cb3b8ca..0000000000 --- a/compiler/optimizing/code_generator_mips64.cc +++ /dev/null @@ -1,7633 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "code_generator_mips64.h" - -#include "arch/mips64/asm_support_mips64.h" -#include "art_method.h" -#include "class_table.h" -#include "code_generator_utils.h" -#include "compiled_method.h" -#include "entrypoints/quick/quick_entrypoints.h" -#include "entrypoints/quick/quick_entrypoints_enum.h" -#include "gc/accounting/card_table.h" -#include "gc/space/image_space.h" -#include "heap_poisoning.h" -#include "intrinsics.h" -#include "intrinsics_mips64.h" -#include "linker/linker_patch.h" -#include "mirror/array-inl.h" -#include "mirror/class-inl.h" -#include "offsets.h" -#include "stack_map_stream.h" -#include "thread.h" -#include "utils/assembler.h" -#include "utils/mips64/assembler_mips64.h" -#include "utils/stack_checks.h" - -namespace art { -namespace mips64 { - -static constexpr int kCurrentMethodStackOffset = 0; -static constexpr GpuRegister kMethodRegisterArgument = A0; - -// Flags controlling the use of thunks for Baker read barriers. -constexpr bool kBakerReadBarrierThunksEnableForFields = true; -constexpr bool kBakerReadBarrierThunksEnableForArrays = true; -constexpr bool kBakerReadBarrierThunksEnableForGcRoots = true; - -Location Mips64ReturnLocation(DataType::Type return_type) { - switch (return_type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kUint32: - case DataType::Type::kInt32: - case DataType::Type::kReference: - case DataType::Type::kUint64: - case DataType::Type::kInt64: - return Location::RegisterLocation(V0); - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - return Location::FpuRegisterLocation(F0); - - case DataType::Type::kVoid: - return Location(); - } - UNREACHABLE(); -} - -Location InvokeDexCallingConventionVisitorMIPS64::GetReturnLocation(DataType::Type type) const { - return Mips64ReturnLocation(type); -} - -Location InvokeDexCallingConventionVisitorMIPS64::GetMethodLocation() const { - return Location::RegisterLocation(kMethodRegisterArgument); -} - -Location InvokeDexCallingConventionVisitorMIPS64::GetNextLocation(DataType::Type type) { - Location next_location; - if (type == DataType::Type::kVoid) { - LOG(FATAL) << "Unexpected parameter type " << type; - } - - if (DataType::IsFloatingPointType(type) && - (float_index_ < calling_convention.GetNumberOfFpuRegisters())) { - next_location = Location::FpuRegisterLocation( - calling_convention.GetFpuRegisterAt(float_index_++)); - gp_index_++; - } else if (!DataType::IsFloatingPointType(type) && - (gp_index_ < calling_convention.GetNumberOfRegisters())) { - next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index_++)); - float_index_++; - } else { - size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); - next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) - : Location::StackSlot(stack_offset); - } - - // Space on the stack is reserved for all arguments. - stack_index_ += DataType::Is64BitType(type) ? 2 : 1; - - return next_location; -} - -Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type type) { - return Mips64ReturnLocation(type); -} - -static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { - InvokeRuntimeCallingConvention calling_convention; - RegisterSet caller_saves = RegisterSet::Empty(); - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - // The reference is returned in the same register. This differs from the standard return location. - return caller_saves; -} - -// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. -#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT -#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value() - -class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - explicit BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction) : SlowPathCodeMIPS64(instruction) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - __ Bind(GetEntryLabel()); - if (instruction_->CanThrowIntoCatchBlock()) { - // Live registers will be restored in the catch block if caught. - SaveLiveRegisters(codegen, instruction_->GetLocations()); - } - // We're moving two locations to locations that could overlap, so we need a parallel - // move resolver. - InvokeRuntimeCallingConvention calling_convention; - codegen->EmitParallelMoves(locations->InAt(0), - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - DataType::Type::kInt32, - locations->InAt(1), - Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - DataType::Type::kInt32); - QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() - ? kQuickThrowStringBounds - : kQuickThrowArrayBounds; - mips64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); - CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); - } - - bool IsFatal() const override { return true; } - - const char* GetDescription() const override { return "BoundsCheckSlowPathMIPS64"; } - - private: - DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS64); -}; - -class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) - : SlowPathCodeMIPS64(instruction) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - __ Bind(GetEntryLabel()); - mips64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); - } - - bool IsFatal() const override { return true; } - - const char* GetDescription() const override { return "DivZeroCheckSlowPathMIPS64"; } - - private: - DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS64); -}; - -class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - LoadClassSlowPathMIPS64(HLoadClass* cls, HInstruction* at) - : SlowPathCodeMIPS64(at), cls_(cls) { - DCHECK(at->IsLoadClass() || at->IsClinitCheck()); - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); - } - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - Location out = locations->Out(); - const uint32_t dex_pc = instruction_->GetDexPc(); - bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); - bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); - - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); - - InvokeRuntimeCallingConvention calling_convention; - if (must_resolve_type) { - DCHECK(IsSameDexFile(cls_->GetDexFile(), mips64_codegen->GetGraph()->GetDexFile())); - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); - mips64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); - // If we also must_do_clinit, the resolved type is now in the correct register. - } else { - DCHECK(must_do_clinit); - Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); - mips64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - source, - cls_->GetType()); - } - if (must_do_clinit) { - mips64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); - } - - // Move the class to the desired location. - if (out.IsValid()) { - DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); - DataType::Type type = instruction_->GetType(); - mips64_codegen->MoveLocation(out, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - type); - } - RestoreLiveRegisters(codegen, locations); - - __ Bc(GetExitLabel()); - } - - const char* GetDescription() const override { return "LoadClassSlowPathMIPS64"; } - - private: - // The class this slow path will load. - HLoadClass* const cls_; - - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS64); -}; - -class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - explicit LoadStringSlowPathMIPS64(HLoadString* instruction) - : SlowPathCodeMIPS64(instruction) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - DCHECK(instruction_->IsLoadString()); - DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); - LocationSummary* locations = instruction_->GetLocations(); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); - const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - InvokeRuntimeCallingConvention calling_convention; - __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); - - __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_); - mips64_codegen->InvokeRuntime(kQuickResolveString, - instruction_, - instruction_->GetDexPc(), - this); - CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - - DataType::Type type = instruction_->GetType(); - mips64_codegen->MoveLocation(locations->Out(), - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - type); - RestoreLiveRegisters(codegen, locations); - - __ Bc(GetExitLabel()); - } - - const char* GetDescription() const override { return "LoadStringSlowPathMIPS64"; } - - private: - DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS64); -}; - -class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - explicit NullCheckSlowPathMIPS64(HNullCheck* instr) : SlowPathCodeMIPS64(instr) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - __ Bind(GetEntryLabel()); - if (instruction_->CanThrowIntoCatchBlock()) { - // Live registers will be restored in the catch block if caught. - SaveLiveRegisters(codegen, instruction_->GetLocations()); - } - mips64_codegen->InvokeRuntime(kQuickThrowNullPointer, - instruction_, - instruction_->GetDexPc(), - this); - CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); - } - - bool IsFatal() const override { return true; } - - const char* GetDescription() const override { return "NullCheckSlowPathMIPS64"; } - - private: - DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS64); -}; - -class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - SuspendCheckSlowPathMIPS64(HSuspendCheck* instruction, HBasicBlock* successor) - : SlowPathCodeMIPS64(instruction), successor_(successor) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); // Only saves live vector registers for SIMD. - mips64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, locations); // Only restores live vector registers for SIMD. - if (successor_ == nullptr) { - __ Bc(GetReturnLabel()); - } else { - __ Bc(mips64_codegen->GetLabelOf(successor_)); - } - } - - Mips64Label* GetReturnLabel() { - DCHECK(successor_ == nullptr); - return &return_label_; - } - - const char* GetDescription() const override { return "SuspendCheckSlowPathMIPS64"; } - - HBasicBlock* GetSuccessor() const { - return successor_; - } - - private: - // If not null, the block to branch to after the suspend check. - HBasicBlock* const successor_; - - // If `successor_` is null, the label to branch to after the suspend check. - Mips64Label return_label_; - - DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS64); -}; - -class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - explicit TypeCheckSlowPathMIPS64(HInstruction* instruction, bool is_fatal) - : SlowPathCodeMIPS64(instruction), is_fatal_(is_fatal) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - - uint32_t dex_pc = instruction_->GetDexPc(); - DCHECK(instruction_->IsCheckCast() - || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - - __ Bind(GetEntryLabel()); - if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { - SaveLiveRegisters(codegen, locations); - } - - // We're moving two locations to locations that could overlap, so we need a parallel - // move resolver. - InvokeRuntimeCallingConvention calling_convention; - codegen->EmitParallelMoves(locations->InAt(0), - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - DataType::Type::kReference, - locations->InAt(1), - Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - DataType::Type::kReference); - if (instruction_->IsInstanceOf()) { - mips64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); - DataType::Type ret_type = instruction_->GetType(); - Location ret_loc = calling_convention.GetReturnLocation(ret_type); - mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - } else { - DCHECK(instruction_->IsCheckCast()); - mips64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); - } - - if (!is_fatal_) { - RestoreLiveRegisters(codegen, locations); - __ Bc(GetExitLabel()); - } - } - - const char* GetDescription() const override { return "TypeCheckSlowPathMIPS64"; } - - bool IsFatal() const override { return is_fatal_; } - - private: - const bool is_fatal_; - - DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS64); -}; - -class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction) - : SlowPathCodeMIPS64(instruction) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - __ Bind(GetEntryLabel()); - LocationSummary* locations = instruction_->GetLocations(); - SaveLiveRegisters(codegen, locations); - InvokeRuntimeCallingConvention calling_convention; - __ LoadConst32(calling_convention.GetRegisterAt(0), - static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); - mips64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); - } - - const char* GetDescription() const override { return "DeoptimizationSlowPathMIPS64"; } - - private: - DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64); -}; - -class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - explicit ArraySetSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {} - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); - - InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); - parallel_move.AddMove( - locations->InAt(0), - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - DataType::Type::kReference, - nullptr); - parallel_move.AddMove( - locations->InAt(1), - Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - DataType::Type::kInt32, - nullptr); - parallel_move.AddMove( - locations->InAt(2), - Location::RegisterLocation(calling_convention.GetRegisterAt(2)), - DataType::Type::kReference, - nullptr); - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); - - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - mips64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); - RestoreLiveRegisters(codegen, locations); - __ Bc(GetExitLabel()); - } - - const char* GetDescription() const override { return "ArraySetSlowPathMIPS64"; } - - private: - DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS64); -}; - -// Slow path marking an object reference `ref` during a read -// barrier. The field `obj.field` in the object `obj` holding this -// reference does not get updated by this slow path after marking (see -// ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 below for that). -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// If `entrypoint` is a valid location it is assumed to already be -// holding the entrypoint. The case where the entrypoint is passed in -// is for the GcRoot read barrier. -class ReadBarrierMarkSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - ReadBarrierMarkSlowPathMIPS64(HInstruction* instruction, - Location ref, - Location entrypoint = Location::NoLocation()) - : SlowPathCodeMIPS64(instruction), ref_(ref), entrypoint_(entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const override { return "ReadBarrierMarkSlowPathMIPS"; } - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - GpuRegister ref_reg = ref_.AsRegister<GpuRegister>(); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; - DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsStaticFieldGet() || - instruction_->IsArrayGet() || - instruction_->IsArraySet() || - instruction_->IsLoadClass() || - instruction_->IsLoadString() || - instruction_->IsInstanceOf() || - instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || - (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - // No need to save live registers; it's taken care of by the - // entrypoint. Also, there is no need to update the stack mask, - // as this runtime call will not trigger a garbage collection. - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - DCHECK((V0 <= ref_reg && ref_reg <= T2) || - (S2 <= ref_reg && ref_reg <= S7) || - (ref_reg == S8)) << ref_reg; - // "Compact" slow path, saving two moves. - // - // Instead of using the standard runtime calling convention (input - // and output in A0 and V0 respectively): - // - // A0 <- ref - // V0 <- ReadBarrierMark(A0) - // ref <- V0 - // - // we just use rX (the register containing `ref`) as input and output - // of a dedicated entrypoint: - // - // rX <- ReadBarrierMarkRegX(rX) - // - if (entrypoint_.IsValid()) { - mips64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); - DCHECK_EQ(entrypoint_.AsRegister<GpuRegister>(), T9); - __ Jalr(entrypoint_.AsRegister<GpuRegister>()); - __ Nop(); - } else { - int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1); - // This runtime call does not require a stack map. - mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, - instruction_, - this); - } - __ Bc(GetExitLabel()); - } - - private: - // The location (register) of the marked object reference. - const Location ref_; - - // The location of the entrypoint if already loaded. - const Location entrypoint_; - - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS64); -}; - -// Slow path marking an object reference `ref` during a read barrier, -// and if needed, atomically updating the field `obj.field` in the -// object `obj` holding this reference after marking (contrary to -// ReadBarrierMarkSlowPathMIPS64 above, which never tries to update -// `obj.field`). -// -// This means that after the execution of this slow path, both `ref` -// and `obj.field` will be up-to-date; i.e., after the flip, both will -// hold the same to-space reference (unless another thread installed -// another object reference (different from `ref`) in `obj.field`). -class ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(HInstruction* instruction, - Location ref, - GpuRegister obj, - Location field_offset, - GpuRegister temp1) - : SlowPathCodeMIPS64(instruction), - ref_(ref), - obj_(obj), - field_offset_(field_offset), - temp1_(temp1) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const override { - return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS64"; - } - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - GpuRegister ref_reg = ref_.AsRegister<GpuRegister>(); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; - // This slow path is only used by the UnsafeCASObject intrinsic. - DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking and field updating slow path: " - << instruction_->DebugName(); - DCHECK(instruction_->GetLocations()->Intrinsified()); - DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); - DCHECK(field_offset_.IsRegister()) << field_offset_; - - __ Bind(GetEntryLabel()); - - // Save the old reference. - // Note that we cannot use AT or TMP to save the old reference, as those - // are used by the code that follows, but we need the old reference after - // the call to the ReadBarrierMarkRegX entry point. - DCHECK_NE(temp1_, AT); - DCHECK_NE(temp1_, TMP); - __ Move(temp1_, ref_reg); - - // No need to save live registers; it's taken care of by the - // entrypoint. Also, there is no need to update the stack mask, - // as this runtime call will not trigger a garbage collection. - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - DCHECK((V0 <= ref_reg && ref_reg <= T2) || - (S2 <= ref_reg && ref_reg <= S7) || - (ref_reg == S8)) << ref_reg; - // "Compact" slow path, saving two moves. - // - // Instead of using the standard runtime calling convention (input - // and output in A0 and V0 respectively): - // - // A0 <- ref - // V0 <- ReadBarrierMark(A0) - // ref <- V0 - // - // we just use rX (the register containing `ref`) as input and output - // of a dedicated entrypoint: - // - // rX <- ReadBarrierMarkRegX(rX) - // - int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1); - // This runtime call does not require a stack map. - mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, - instruction_, - this); - - // If the new reference is different from the old reference, - // update the field in the holder (`*(obj_ + field_offset_)`). - // - // Note that this field could also hold a different object, if - // another thread had concurrently changed it. In that case, the - // the compare-and-set (CAS) loop below would abort, leaving the - // field as-is. - Mips64Label done; - __ Beqc(temp1_, ref_reg, &done); - - // Update the the holder's field atomically. This may fail if - // mutator updates before us, but it's OK. This is achieved - // using a strong compare-and-set (CAS) operation with relaxed - // memory synchronization ordering, where the expected value is - // the old reference and the desired value is the new reference. - - // Convenience aliases. - GpuRegister base = obj_; - GpuRegister offset = field_offset_.AsRegister<GpuRegister>(); - GpuRegister expected = temp1_; - GpuRegister value = ref_reg; - GpuRegister tmp_ptr = TMP; // Pointer to actual memory. - GpuRegister tmp = AT; // Value in memory. - - __ Daddu(tmp_ptr, base, offset); - - if (kPoisonHeapReferences) { - __ PoisonHeapReference(expected); - // Do not poison `value` if it is the same register as - // `expected`, which has just been poisoned. - if (value != expected) { - __ PoisonHeapReference(value); - } - } - - // do { - // tmp = [r_ptr] - expected; - // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); - - Mips64Label loop_head, exit_loop; - __ Bind(&loop_head); - __ Ll(tmp, tmp_ptr); - // The LL instruction sign-extends the 32-bit value, but - // 32-bit references must be zero-extended. Zero-extend `tmp`. - __ Dext(tmp, tmp, 0, 32); - __ Bnec(tmp, expected, &exit_loop); - __ Move(tmp, value); - __ Sc(tmp, tmp_ptr); - __ Beqzc(tmp, &loop_head); - __ Bind(&exit_loop); - - if (kPoisonHeapReferences) { - __ UnpoisonHeapReference(expected); - // Do not unpoison `value` if it is the same register as - // `expected`, which has just been unpoisoned. - if (value != expected) { - __ UnpoisonHeapReference(value); - } - } - - __ Bind(&done); - __ Bc(GetExitLabel()); - } - - private: - // The location (register) of the marked object reference. - const Location ref_; - // The register containing the object holding the marked object reference field. - const GpuRegister obj_; - // The location of the offset of the marked reference field within `obj_`. - Location field_offset_; - - const GpuRegister temp1_; - - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS64); -}; - -// Slow path generating a read barrier for a heap reference. -class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - ReadBarrierForHeapReferenceSlowPathMIPS64(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) - : SlowPathCodeMIPS64(instruction), - out_(out), - ref_(ref), - obj_(obj), - offset_(offset), - index_(index) { - DCHECK(kEmitCompilerReadBarrier); - // If `obj` is equal to `out` or `ref`, it means the initial object - // has been overwritten by (or after) the heap object reference load - // to be instrumented, e.g.: - // - // __ LoadFromOffset(kLoadWord, out, out, offset); - // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); - // - // In that case, we have lost the information about the original - // object, and the emitted read barrier cannot work properly. - DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; - DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; - } - - void EmitNativeCode(CodeGenerator* codegen) override { - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - LocationSummary* locations = instruction_->GetLocations(); - DataType::Type type = DataType::Type::kReference; - GpuRegister reg_out = out_.AsRegister<GpuRegister>(); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); - DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsStaticFieldGet() || - instruction_->IsArrayGet() || - instruction_->IsInstanceOf() || - instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier for heap reference slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); - - // We may have to change the index's value, but as `index_` is a - // constant member (like other "inputs" of this slow path), - // introduce a copy of it, `index`. - Location index = index_; - if (index_.IsValid()) { - // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. - if (instruction_->IsArrayGet()) { - // Compute the actual memory offset and store it in `index`. - GpuRegister index_reg = index_.AsRegister<GpuRegister>(); - DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); - if (codegen->IsCoreCalleeSaveRegister(index_reg)) { - // We are about to change the value of `index_reg` (see the - // calls to art::mips64::Mips64Assembler::Sll and - // art::mips64::MipsAssembler::Addiu32 below), but it has - // not been saved by the previous call to - // art::SlowPathCode::SaveLiveRegisters, as it is a - // callee-save register -- - // art::SlowPathCode::SaveLiveRegisters does not consider - // callee-save registers, as it has been designed with the - // assumption that callee-save registers are supposed to be - // handled by the called function. So, as a callee-save - // register, `index_reg` _would_ eventually be saved onto - // the stack, but it would be too late: we would have - // changed its value earlier. Therefore, we manually save - // it here into another freely available register, - // `free_reg`, chosen of course among the caller-save - // registers (as a callee-save `free_reg` register would - // exhibit the same problem). - // - // Note we could have requested a temporary register from - // the register allocator instead; but we prefer not to, as - // this is a slow path, and we know we can find a - // caller-save register that is available. - GpuRegister free_reg = FindAvailableCallerSaveRegister(codegen); - __ Move(free_reg, index_reg); - index_reg = free_reg; - index = Location::RegisterLocation(index_reg); - } else { - // The initial register stored in `index_` has already been - // saved in the call to art::SlowPathCode::SaveLiveRegisters - // (as it is not a callee-save register), so we can freely - // use it. - } - // Shifting the index value contained in `index_reg` by the scale - // factor (2) cannot overflow in practice, as the runtime is - // unable to allocate object arrays with a size larger than - // 2^26 - 1 (that is, 2^28 - 4 bytes). - __ Sll(index_reg, index_reg, TIMES_4); - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - __ Addiu32(index_reg, index_reg, offset_); - } else { - // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile - // intrinsics, `index_` is not shifted by a scale factor of 2 - // (as in the case of ArrayGet), as it is actually an offset - // to an object field within an object. - DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); - DCHECK(instruction_->GetLocations()->Intrinsified()); - DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || - (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) - << instruction_->AsInvoke()->GetIntrinsic(); - DCHECK_EQ(offset_, 0U); - DCHECK(index_.IsRegister()); - } - } - - // We're moving two or three locations to locations that could - // overlap, so we need a parallel move resolver. - InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); - parallel_move.AddMove(ref_, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - DataType::Type::kReference, - nullptr); - parallel_move.AddMove(obj_, - Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - DataType::Type::kReference, - nullptr); - if (index.IsValid()) { - parallel_move.AddMove(index, - Location::RegisterLocation(calling_convention.GetRegisterAt(2)), - DataType::Type::kInt32, - nullptr); - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); - } else { - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); - __ LoadConst32(calling_convention.GetRegisterAt(2), offset_); - } - mips64_codegen->InvokeRuntime(kQuickReadBarrierSlow, - instruction_, - instruction_->GetDexPc(), - this); - CheckEntrypointTypes< - kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); - mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); - - RestoreLiveRegisters(codegen, locations); - __ Bc(GetExitLabel()); - } - - const char* GetDescription() const override { - return "ReadBarrierForHeapReferenceSlowPathMIPS64"; - } - - private: - GpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) { - size_t ref = static_cast<int>(ref_.AsRegister<GpuRegister>()); - size_t obj = static_cast<int>(obj_.AsRegister<GpuRegister>()); - for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { - if (i != ref && - i != obj && - !codegen->IsCoreCalleeSaveRegister(i) && - !codegen->IsBlockedCoreRegister(i)) { - return static_cast<GpuRegister>(i); - } - } - // We shall never fail to find a free caller-save register, as - // there are more than two core caller-save registers on MIPS64 - // (meaning it is possible to find one which is different from - // `ref` and `obj`). - DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); - LOG(FATAL) << "Could not find a free caller-save register"; - UNREACHABLE(); - } - - const Location out_; - const Location ref_; - const Location obj_; - const uint32_t offset_; - // An additional location containing an index to an array. - // Only used for HArrayGet and the UnsafeGetObject & - // UnsafeGetObjectVolatile intrinsics. - const Location index_; - - DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS64); -}; - -// Slow path generating a read barrier for a GC root. -class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - ReadBarrierForRootSlowPathMIPS64(HInstruction* instruction, Location out, Location root) - : SlowPathCodeMIPS64(instruction), out_(out), root_(root) { - DCHECK(kEmitCompilerReadBarrier); - } - - void EmitNativeCode(CodeGenerator* codegen) override { - LocationSummary* locations = instruction_->GetLocations(); - DataType::Type type = DataType::Type::kReference; - GpuRegister reg_out = out_.AsRegister<GpuRegister>(); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) - << "Unexpected instruction in read barrier for GC root slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); - - InvokeRuntimeCallingConvention calling_convention; - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - mips64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - root_, - DataType::Type::kReference); - mips64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, - instruction_, - instruction_->GetDexPc(), - this); - CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); - mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); - - RestoreLiveRegisters(codegen, locations); - __ Bc(GetExitLabel()); - } - - const char* GetDescription() const override { return "ReadBarrierForRootSlowPathMIPS64"; } - - private: - const Location out_; - const Location root_; - - DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS64); -}; - -CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, - const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats) - : CodeGenerator(graph, - kNumberOfGpuRegisters, - kNumberOfFpuRegisters, - /* number_of_register_pairs= */ 0, - ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), - arraysize(kCoreCalleeSaves)), - ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), - arraysize(kFpuCalleeSaves)), - compiler_options, - stats), - block_labels_(nullptr), - location_builder_(graph, this), - instruction_visitor_(graph, this), - move_resolver_(graph->GetAllocator(), this), - assembler_(graph->GetAllocator(), - compiler_options.GetInstructionSetFeatures()->AsMips64InstructionSetFeatures()), - uint32_literals_(std::less<uint32_t>(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - uint64_literals_(std::less<uint64_t>(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - jit_string_patches_(StringReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - jit_class_patches_(TypeReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { - // Save RA (containing the return address) to mimic Quick. - AddAllocatedRegister(Location::RegisterLocation(RA)); -} - -#undef __ -// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. -#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT -#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value() - -void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) { - // Ensure that we fix up branches. - __ FinalizeCode(); - - // Adjust native pc offsets in stack maps. - StackMapStream* stack_map_stream = GetStackMapStream(); - for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) { - uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i); - uint32_t new_position = __ GetAdjustedPosition(old_position); - DCHECK_GE(new_position, old_position); - stack_map_stream->SetStackMapNativePcOffset(i, new_position); - } - - // Adjust pc offsets for the disassembly information. - if (disasm_info_ != nullptr) { - GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval(); - frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start); - frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end); - for (auto& it : *disasm_info_->GetInstructionIntervals()) { - it.second.start = __ GetAdjustedPosition(it.second.start); - it.second.end = __ GetAdjustedPosition(it.second.end); - } - for (auto& it : *disasm_info_->GetSlowPathIntervals()) { - it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start); - it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end); - } - } - - CodeGenerator::Finalize(allocator); -} - -Mips64Assembler* ParallelMoveResolverMIPS64::GetAssembler() const { - return codegen_->GetAssembler(); -} - -void ParallelMoveResolverMIPS64::EmitMove(size_t index) { - MoveOperands* move = moves_[index]; - codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType()); -} - -void ParallelMoveResolverMIPS64::EmitSwap(size_t index) { - MoveOperands* move = moves_[index]; - codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType()); -} - -void ParallelMoveResolverMIPS64::RestoreScratch(int reg) { - // Pop reg - __ Ld(GpuRegister(reg), SP, 0); - __ DecreaseFrameSize(kMips64DoublewordSize); -} - -void ParallelMoveResolverMIPS64::SpillScratch(int reg) { - // Push reg - __ IncreaseFrameSize(kMips64DoublewordSize); - __ Sd(GpuRegister(reg), SP, 0); -} - -void ParallelMoveResolverMIPS64::Exchange(int index1, int index2, bool double_slot) { - LoadOperandType load_type = double_slot ? kLoadDoubleword : kLoadWord; - StoreOperandType store_type = double_slot ? kStoreDoubleword : kStoreWord; - // Allocate a scratch register other than TMP, if available. - // Else, spill V0 (arbitrary choice) and use it as a scratch register (it will be - // automatically unspilled when the scratch scope object is destroyed). - ScratchRegisterScope ensure_scratch(this, TMP, V0, codegen_->GetNumberOfCoreRegisters()); - // If V0 spills onto the stack, SP-relative offsets need to be adjusted. - int stack_offset = ensure_scratch.IsSpilled() ? kMips64DoublewordSize : 0; - __ LoadFromOffset(load_type, - GpuRegister(ensure_scratch.GetRegister()), - SP, - index1 + stack_offset); - __ LoadFromOffset(load_type, - TMP, - SP, - index2 + stack_offset); - __ StoreToOffset(store_type, - GpuRegister(ensure_scratch.GetRegister()), - SP, - index2 + stack_offset); - __ StoreToOffset(store_type, TMP, SP, index1 + stack_offset); -} - -void ParallelMoveResolverMIPS64::ExchangeQuadSlots(int index1, int index2) { - __ LoadFpuFromOffset(kLoadQuadword, FTMP, SP, index1); - __ LoadFpuFromOffset(kLoadQuadword, FTMP2, SP, index2); - __ StoreFpuToOffset(kStoreQuadword, FTMP, SP, index2); - __ StoreFpuToOffset(kStoreQuadword, FTMP2, SP, index1); -} - -static dwarf::Reg DWARFReg(GpuRegister reg) { - return dwarf::Reg::Mips64Core(static_cast<int>(reg)); -} - -static dwarf::Reg DWARFReg(FpuRegister reg) { - return dwarf::Reg::Mips64Fp(static_cast<int>(reg)); -} - -void CodeGeneratorMIPS64::GenerateFrameEntry() { - __ Bind(&frame_entry_label_); - - if (GetCompilerOptions().CountHotnessInCompiledCode()) { - __ Lhu(TMP, kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()); - __ Addiu(TMP, TMP, 1); - __ Sh(TMP, kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()); - } - - bool do_overflow_check = - FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kMips64) || !IsLeafMethod(); - - if (do_overflow_check) { - __ LoadFromOffset( - kLoadWord, - ZERO, - SP, - -static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kMips64))); - RecordPcInfo(nullptr, 0); - } - - if (HasEmptyFrame()) { - return; - } - - // Make sure the frame size isn't unreasonably large. - if (GetFrameSize() > GetStackOverflowReservedBytes(InstructionSet::kMips64)) { - LOG(FATAL) << "Stack frame larger than " - << GetStackOverflowReservedBytes(InstructionSet::kMips64) << " bytes"; - } - - // Spill callee-saved registers. - - uint32_t ofs = GetFrameSize(); - __ IncreaseFrameSize(ofs); - - for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { - GpuRegister reg = kCoreCalleeSaves[i]; - if (allocated_registers_.ContainsCoreRegister(reg)) { - ofs -= kMips64DoublewordSize; - __ StoreToOffset(kStoreDoubleword, reg, SP, ofs); - __ cfi().RelOffset(DWARFReg(reg), ofs); - } - } - - for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { - FpuRegister reg = kFpuCalleeSaves[i]; - if (allocated_registers_.ContainsFloatingPointRegister(reg)) { - ofs -= kMips64DoublewordSize; - __ StoreFpuToOffset(kStoreDoubleword, reg, SP, ofs); - __ cfi().RelOffset(DWARFReg(reg), ofs); - } - } - - // Save the current method if we need it. Note that we do not - // do this in HCurrentMethod, as the instruction might have been removed - // in the SSA graph. - if (RequiresCurrentMethod()) { - __ StoreToOffset(kStoreDoubleword, kMethodRegisterArgument, SP, kCurrentMethodStackOffset); - } - - if (GetGraph()->HasShouldDeoptimizeFlag()) { - // Initialize should_deoptimize flag to 0. - __ StoreToOffset(kStoreWord, ZERO, SP, GetStackOffsetOfShouldDeoptimizeFlag()); - } -} - -void CodeGeneratorMIPS64::GenerateFrameExit() { - __ cfi().RememberState(); - - if (!HasEmptyFrame()) { - // Restore callee-saved registers. - - // For better instruction scheduling restore RA before other registers. - uint32_t ofs = GetFrameSize(); - for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { - GpuRegister reg = kCoreCalleeSaves[i]; - if (allocated_registers_.ContainsCoreRegister(reg)) { - ofs -= kMips64DoublewordSize; - __ LoadFromOffset(kLoadDoubleword, reg, SP, ofs); - __ cfi().Restore(DWARFReg(reg)); - } - } - - for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { - FpuRegister reg = kFpuCalleeSaves[i]; - if (allocated_registers_.ContainsFloatingPointRegister(reg)) { - ofs -= kMips64DoublewordSize; - __ LoadFpuFromOffset(kLoadDoubleword, reg, SP, ofs); - __ cfi().Restore(DWARFReg(reg)); - } - } - - __ DecreaseFrameSize(GetFrameSize()); - } - - __ Jic(RA, 0); - - __ cfi().RestoreState(); - __ cfi().DefCFAOffset(GetFrameSize()); -} - -void CodeGeneratorMIPS64::Bind(HBasicBlock* block) { - __ Bind(GetLabelOf(block)); -} - -void CodeGeneratorMIPS64::MoveLocation(Location destination, - Location source, - DataType::Type dst_type) { - if (source.Equals(destination)) { - return; - } - - // A valid move can always be inferred from the destination and source - // locations. When moving from and to a register, the argument type can be - // used to generate 32bit instead of 64bit moves. - bool unspecified_type = (dst_type == DataType::Type::kVoid); - DCHECK_EQ(unspecified_type, false); - - if (destination.IsRegister() || destination.IsFpuRegister()) { - if (unspecified_type) { - HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr; - if (source.IsStackSlot() || - (src_cst != nullptr && (src_cst->IsIntConstant() - || src_cst->IsFloatConstant() - || src_cst->IsNullConstant()))) { - // For stack slots and 32bit constants, a 64bit type is appropriate. - dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32; - } else { - // If the source is a double stack slot or a 64bit constant, a 64bit - // type is appropriate. Else the source is a register, and since the - // type has not been specified, we chose a 64bit type to force a 64bit - // move. - dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64; - } - } - DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) || - (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type))); - if (source.IsStackSlot() || source.IsDoubleStackSlot()) { - // Move to GPR/FPR from stack - LoadOperandType load_type = source.IsStackSlot() ? kLoadWord : kLoadDoubleword; - if (DataType::IsFloatingPointType(dst_type)) { - __ LoadFpuFromOffset(load_type, - destination.AsFpuRegister<FpuRegister>(), - SP, - source.GetStackIndex()); - } else { - // TODO: use load_type = kLoadUnsignedWord when type == DataType::Type::kReference. - __ LoadFromOffset(load_type, - destination.AsRegister<GpuRegister>(), - SP, - source.GetStackIndex()); - } - } else if (source.IsSIMDStackSlot()) { - __ LoadFpuFromOffset(kLoadQuadword, - destination.AsFpuRegister<FpuRegister>(), - SP, - source.GetStackIndex()); - } else if (source.IsConstant()) { - // Move to GPR/FPR from constant - GpuRegister gpr = AT; - if (!DataType::IsFloatingPointType(dst_type)) { - gpr = destination.AsRegister<GpuRegister>(); - } - if (dst_type == DataType::Type::kInt32 || dst_type == DataType::Type::kFloat32) { - int32_t value = GetInt32ValueOf(source.GetConstant()->AsConstant()); - if (DataType::IsFloatingPointType(dst_type) && value == 0) { - gpr = ZERO; - } else { - __ LoadConst32(gpr, value); - } - } else { - int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant()); - if (DataType::IsFloatingPointType(dst_type) && value == 0) { - gpr = ZERO; - } else { - __ LoadConst64(gpr, value); - } - } - if (dst_type == DataType::Type::kFloat32) { - __ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>()); - } else if (dst_type == DataType::Type::kFloat64) { - __ Dmtc1(gpr, destination.AsFpuRegister<FpuRegister>()); - } - } else if (source.IsRegister()) { - if (destination.IsRegister()) { - // Move to GPR from GPR - __ Move(destination.AsRegister<GpuRegister>(), source.AsRegister<GpuRegister>()); - } else { - DCHECK(destination.IsFpuRegister()); - if (DataType::Is64BitType(dst_type)) { - __ Dmtc1(source.AsRegister<GpuRegister>(), destination.AsFpuRegister<FpuRegister>()); - } else { - __ Mtc1(source.AsRegister<GpuRegister>(), destination.AsFpuRegister<FpuRegister>()); - } - } - } else if (source.IsFpuRegister()) { - if (destination.IsFpuRegister()) { - if (GetGraph()->HasSIMD()) { - __ MoveV(VectorRegisterFrom(destination), - VectorRegisterFrom(source)); - } else { - // Move to FPR from FPR - if (dst_type == DataType::Type::kFloat32) { - __ MovS(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>()); - } else { - DCHECK_EQ(dst_type, DataType::Type::kFloat64); - __ MovD(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>()); - } - } - } else { - DCHECK(destination.IsRegister()); - if (DataType::Is64BitType(dst_type)) { - __ Dmfc1(destination.AsRegister<GpuRegister>(), source.AsFpuRegister<FpuRegister>()); - } else { - __ Mfc1(destination.AsRegister<GpuRegister>(), source.AsFpuRegister<FpuRegister>()); - } - } - } - } else if (destination.IsSIMDStackSlot()) { - if (source.IsFpuRegister()) { - __ StoreFpuToOffset(kStoreQuadword, - source.AsFpuRegister<FpuRegister>(), - SP, - destination.GetStackIndex()); - } else { - DCHECK(source.IsSIMDStackSlot()); - __ LoadFpuFromOffset(kLoadQuadword, - FTMP, - SP, - source.GetStackIndex()); - __ StoreFpuToOffset(kStoreQuadword, - FTMP, - SP, - destination.GetStackIndex()); - } - } else { // The destination is not a register. It must be a stack slot. - DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot()); - if (source.IsRegister() || source.IsFpuRegister()) { - if (unspecified_type) { - if (source.IsRegister()) { - dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64; - } else { - dst_type = - destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64; - } - } - DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) && - (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type))); - // Move to stack from GPR/FPR - StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword; - if (source.IsRegister()) { - __ StoreToOffset(store_type, - source.AsRegister<GpuRegister>(), - SP, - destination.GetStackIndex()); - } else { - __ StoreFpuToOffset(store_type, - source.AsFpuRegister<FpuRegister>(), - SP, - destination.GetStackIndex()); - } - } else if (source.IsConstant()) { - // Move to stack from constant - HConstant* src_cst = source.GetConstant(); - StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword; - GpuRegister gpr = ZERO; - if (destination.IsStackSlot()) { - int32_t value = GetInt32ValueOf(src_cst->AsConstant()); - if (value != 0) { - gpr = TMP; - __ LoadConst32(gpr, value); - } - } else { - DCHECK(destination.IsDoubleStackSlot()); - int64_t value = GetInt64ValueOf(src_cst->AsConstant()); - if (value != 0) { - gpr = TMP; - __ LoadConst64(gpr, value); - } - } - __ StoreToOffset(store_type, gpr, SP, destination.GetStackIndex()); - } else { - DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); - DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot()); - // Move to stack from stack - if (destination.IsStackSlot()) { - __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex()); - __ StoreToOffset(kStoreWord, TMP, SP, destination.GetStackIndex()); - } else { - __ LoadFromOffset(kLoadDoubleword, TMP, SP, source.GetStackIndex()); - __ StoreToOffset(kStoreDoubleword, TMP, SP, destination.GetStackIndex()); - } - } - } -} - -void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, DataType::Type type) { - DCHECK(!loc1.IsConstant()); - DCHECK(!loc2.IsConstant()); - - if (loc1.Equals(loc2)) { - return; - } - - bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot(); - bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot(); - bool is_simd1 = loc1.IsSIMDStackSlot(); - bool is_simd2 = loc2.IsSIMDStackSlot(); - bool is_fp_reg1 = loc1.IsFpuRegister(); - bool is_fp_reg2 = loc2.IsFpuRegister(); - - if (loc2.IsRegister() && loc1.IsRegister()) { - // Swap 2 GPRs - GpuRegister r1 = loc1.AsRegister<GpuRegister>(); - GpuRegister r2 = loc2.AsRegister<GpuRegister>(); - __ Move(TMP, r2); - __ Move(r2, r1); - __ Move(r1, TMP); - } else if (is_fp_reg2 && is_fp_reg1) { - // Swap 2 FPRs - if (GetGraph()->HasSIMD()) { - __ MoveV(static_cast<VectorRegister>(FTMP), VectorRegisterFrom(loc1)); - __ MoveV(VectorRegisterFrom(loc1), VectorRegisterFrom(loc2)); - __ MoveV(VectorRegisterFrom(loc2), static_cast<VectorRegister>(FTMP)); - } else { - FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>(); - FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>(); - if (type == DataType::Type::kFloat32) { - __ MovS(FTMP, r1); - __ MovS(r1, r2); - __ MovS(r2, FTMP); - } else { - DCHECK_EQ(type, DataType::Type::kFloat64); - __ MovD(FTMP, r1); - __ MovD(r1, r2); - __ MovD(r2, FTMP); - } - } - } else if (is_slot1 != is_slot2) { - // Swap GPR/FPR and stack slot - Location reg_loc = is_slot1 ? loc2 : loc1; - Location mem_loc = is_slot1 ? loc1 : loc2; - LoadOperandType load_type = mem_loc.IsStackSlot() ? kLoadWord : kLoadDoubleword; - StoreOperandType store_type = mem_loc.IsStackSlot() ? kStoreWord : kStoreDoubleword; - // TODO: use load_type = kLoadUnsignedWord when type == DataType::Type::kReference. - __ LoadFromOffset(load_type, TMP, SP, mem_loc.GetStackIndex()); - if (reg_loc.IsFpuRegister()) { - __ StoreFpuToOffset(store_type, - reg_loc.AsFpuRegister<FpuRegister>(), - SP, - mem_loc.GetStackIndex()); - if (mem_loc.IsStackSlot()) { - __ Mtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>()); - } else { - DCHECK(mem_loc.IsDoubleStackSlot()); - __ Dmtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>()); - } - } else { - __ StoreToOffset(store_type, reg_loc.AsRegister<GpuRegister>(), SP, mem_loc.GetStackIndex()); - __ Move(reg_loc.AsRegister<GpuRegister>(), TMP); - } - } else if (is_slot1 && is_slot2) { - move_resolver_.Exchange(loc1.GetStackIndex(), - loc2.GetStackIndex(), - loc1.IsDoubleStackSlot()); - } else if (is_simd1 && is_simd2) { - move_resolver_.ExchangeQuadSlots(loc1.GetStackIndex(), loc2.GetStackIndex()); - } else if ((is_fp_reg1 && is_simd2) || (is_fp_reg2 && is_simd1)) { - Location fp_reg_loc = is_fp_reg1 ? loc1 : loc2; - Location mem_loc = is_fp_reg1 ? loc2 : loc1; - __ LoadFpuFromOffset(kLoadQuadword, FTMP, SP, mem_loc.GetStackIndex()); - __ StoreFpuToOffset(kStoreQuadword, - fp_reg_loc.AsFpuRegister<FpuRegister>(), - SP, - mem_loc.GetStackIndex()); - __ MoveV(VectorRegisterFrom(fp_reg_loc), static_cast<VectorRegister>(FTMP)); - } else { - LOG(FATAL) << "Unimplemented swap between locations " << loc1 << " and " << loc2; - } -} - -void CodeGeneratorMIPS64::MoveConstant(Location location, int32_t value) { - DCHECK(location.IsRegister()); - __ LoadConst32(location.AsRegister<GpuRegister>(), value); -} - -void CodeGeneratorMIPS64::AddLocationAsTemp(Location location, LocationSummary* locations) { - if (location.IsRegister()) { - locations->AddTemp(location); - } else { - UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; - } -} - -void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, - GpuRegister value, - bool value_can_be_null) { - Mips64Label done; - GpuRegister card = AT; - GpuRegister temp = TMP; - if (value_can_be_null) { - __ Beqzc(value, &done); - } - // Load the address of the card table into `card`. - __ LoadFromOffset(kLoadDoubleword, - card, - TR, - Thread::CardTableOffset<kMips64PointerSize>().Int32Value()); - // Calculate the address of the card corresponding to `object`. - __ Dsrl(temp, object, gc::accounting::CardTable::kCardShift); - __ Daddu(temp, card, temp); - // Write the `art::gc::accounting::CardTable::kCardDirty` value into the - // `object`'s card. - // - // Register `card` contains the address of the card table. Note that the card - // table's base is biased during its creation so that it always starts at an - // address whose least-significant byte is equal to `kCardDirty` (see - // art::gc::accounting::CardTable::Create). Therefore the SB instruction - // below writes the `kCardDirty` (byte) value into the `object`'s card - // (located at `card + object >> kCardShift`). - // - // This dual use of the value in register `card` (1. to calculate the location - // of the card to mark; and 2. to load the `kCardDirty` value) saves a load - // (no need to explicitly load `kCardDirty` as an immediate value). - __ Sb(card, temp, 0); - if (value_can_be_null) { - __ Bind(&done); - } -} - -template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> -inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches( - const ArenaDeque<PcRelativePatchInfo>& infos, - ArenaVector<linker::LinkerPatch>* linker_patches) { - for (const PcRelativePatchInfo& info : infos) { - const DexFile* dex_file = info.target_dex_file; - size_t offset_or_index = info.offset_or_index; - DCHECK(info.label.IsBound()); - uint32_t literal_offset = __ GetLabelLocation(&info.label); - const PcRelativePatchInfo& info_high = info.patch_info_high ? *info.patch_info_high : info; - uint32_t pc_rel_offset = __ GetLabelLocation(&info_high.label); - linker_patches->push_back(Factory(literal_offset, dex_file, pc_rel_offset, offset_or_index)); - } -} - -template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> -linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t pc_insn_offset, - uint32_t boot_image_offset) { - DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. - return Factory(literal_offset, pc_insn_offset, boot_image_offset); -} - -void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { - DCHECK(linker_patches->empty()); - size_t size = - boot_image_method_patches_.size() + - method_bss_entry_patches_.size() + - boot_image_type_patches_.size() + - type_bss_entry_patches_.size() + - boot_image_string_patches_.size() + - string_bss_entry_patches_.size() + - boot_image_intrinsic_patches_.size(); - linker_patches->reserve(size); - if (GetCompilerOptions().IsBootImage()) { - EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( - boot_image_method_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - boot_image_string_patches_, linker_patches); - EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( - boot_image_intrinsic_patches_, linker_patches); - } else { - EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( - boot_image_method_patches_, linker_patches); - DCHECK(boot_image_type_patches_.empty()); - DCHECK(boot_image_string_patches_.empty()); - DCHECK(boot_image_intrinsic_patches_.empty()); - } - EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( - method_bss_entry_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( - type_bss_entry_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( - string_bss_entry_patches_, linker_patches); - DCHECK_EQ(size, linker_patches->size()); -} - -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageIntrinsicPatch( - uint32_t intrinsic_data, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch( - /* dex_file= */ nullptr, intrinsic_data, info_high, &boot_image_intrinsic_patches_); -} - -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageRelRoPatch( - uint32_t boot_image_offset, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch( - /* dex_file= */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_); -} - -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageMethodPatch( - MethodReference target_method, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch( - target_method.dex_file, target_method.index, info_high, &boot_image_method_patches_); -} - -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewMethodBssEntryPatch( - MethodReference target_method, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch( - target_method.dex_file, target_method.index, info_high, &method_bss_entry_patches_); -} - -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageTypePatch( - const DexFile& dex_file, - dex::TypeIndex type_index, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &boot_image_type_patches_); -} - -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewTypeBssEntryPatch( - const DexFile& dex_file, - dex::TypeIndex type_index, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &type_bss_entry_patches_); -} - -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageStringPatch( - const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch( - &dex_file, string_index.index_, info_high, &boot_image_string_patches_); -} - -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewStringBssEntryPatch( - const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &string_bss_entry_patches_); -} - -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativePatch( - const DexFile* dex_file, - uint32_t offset_or_index, - const PcRelativePatchInfo* info_high, - ArenaDeque<PcRelativePatchInfo>* patches) { - patches->emplace_back(dex_file, offset_or_index, info_high); - return &patches->back(); -} - -Literal* CodeGeneratorMIPS64::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) { - return map->GetOrCreate( - value, - [this, value]() { return __ NewLiteral<uint32_t>(value); }); -} - -Literal* CodeGeneratorMIPS64::DeduplicateUint64Literal(uint64_t value) { - return uint64_literals_.GetOrCreate( - value, - [this, value]() { return __ NewLiteral<uint64_t>(value); }); -} - -Literal* CodeGeneratorMIPS64::DeduplicateBootImageAddressLiteral(uint64_t address) { - return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_); -} - -void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, - GpuRegister out, - PcRelativePatchInfo* info_low) { - DCHECK(!info_high->patch_info_high); - __ Bind(&info_high->label); - // Add the high half of a 32-bit offset to PC. - __ Auipc(out, /* imm16= */ 0x1234); - // A following instruction will add the sign-extended low half of the 32-bit - // offset to `out` (e.g. ld, jialc, daddiu). - if (info_low != nullptr) { - DCHECK_EQ(info_low->patch_info_high, info_high); - __ Bind(&info_low->label); - } -} - -void CodeGeneratorMIPS64::LoadBootImageAddress(GpuRegister reg, uint32_t boot_image_reference) { - if (GetCompilerOptions().IsBootImage()) { - PcRelativePatchInfo* info_high = NewBootImageIntrinsicPatch(boot_image_reference); - PcRelativePatchInfo* info_low = NewBootImageIntrinsicPatch(boot_image_reference, info_high); - EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Daddiu(reg, AT, /* imm16= */ 0x5678); - } else if (GetCompilerOptions().GetCompilePic()) { - PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_reference); - PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_reference, info_high); - EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. - __ Lwu(reg, AT, /* imm16= */ 0x5678); - } else { - DCHECK(Runtime::Current()->UseJitCompilation()); - gc::Heap* heap = Runtime::Current()->GetHeap(); - DCHECK(!heap->GetBootImageSpaces().empty()); - uintptr_t address = - reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference); - __ LoadLiteral(reg, kLoadDoubleword, DeduplicateBootImageAddressLiteral(address)); - } -} - -void CodeGeneratorMIPS64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, - uint32_t boot_image_offset) { - DCHECK(invoke->IsStatic()); - InvokeRuntimeCallingConvention calling_convention; - GpuRegister argument = calling_convention.GetRegisterAt(0); - if (GetCompilerOptions().IsBootImage()) { - DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); - // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. - MethodReference target_method = invoke->GetTargetMethod(); - dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; - PcRelativePatchInfo* info_high = NewBootImageTypePatch(*target_method.dex_file, type_idx); - PcRelativePatchInfo* info_low = - NewBootImageTypePatch(*target_method.dex_file, type_idx, info_high); - EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Daddiu(argument, AT, /* imm16= */ 0x5678); - } else { - LoadBootImageAddress(argument, boot_image_offset); - } - InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); -} - -Literal* CodeGeneratorMIPS64::DeduplicateJitStringLiteral(const DexFile& dex_file, - dex::StringIndex string_index, - Handle<mirror::String> handle) { - ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); - return jit_string_patches_.GetOrCreate( - StringReference(&dex_file, string_index), - [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); }); -} - -Literal* CodeGeneratorMIPS64::DeduplicateJitClassLiteral(const DexFile& dex_file, - dex::TypeIndex type_index, - Handle<mirror::Class> handle) { - ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); - return jit_class_patches_.GetOrCreate( - TypeReference(&dex_file, type_index), - [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); }); -} - -void CodeGeneratorMIPS64::PatchJitRootUse(uint8_t* code, - const uint8_t* roots_data, - const Literal* literal, - uint64_t index_in_table) const { - uint32_t literal_offset = GetAssembler().GetLabelLocation(literal->GetLabel()); - uintptr_t address = - reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); - reinterpret_cast<uint32_t*>(code + literal_offset)[0] = dchecked_integral_cast<uint32_t>(address); -} - -void CodeGeneratorMIPS64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { - for (const auto& entry : jit_string_patches_) { - const StringReference& string_reference = entry.first; - Literal* table_entry_literal = entry.second; - uint64_t index_in_table = GetJitStringRootIndex(string_reference); - PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); - } - for (const auto& entry : jit_class_patches_) { - const TypeReference& type_reference = entry.first; - Literal* table_entry_literal = entry.second; - uint64_t index_in_table = GetJitClassRootIndex(type_reference); - PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); - } -} - -void CodeGeneratorMIPS64::SetupBlockedRegisters() const { - // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated. - blocked_core_registers_[ZERO] = true; - blocked_core_registers_[K0] = true; - blocked_core_registers_[K1] = true; - blocked_core_registers_[GP] = true; - blocked_core_registers_[SP] = true; - blocked_core_registers_[RA] = true; - - // AT, TMP(T8) and TMP2(T3) are used as temporary/scratch - // registers (similar to how AT is used by MIPS assemblers). - blocked_core_registers_[AT] = true; - blocked_core_registers_[TMP] = true; - blocked_core_registers_[TMP2] = true; - blocked_fpu_registers_[FTMP] = true; - - if (GetInstructionSetFeatures().HasMsa()) { - // To be used just for MSA instructions. - blocked_fpu_registers_[FTMP2] = true; - } - - // Reserve suspend and thread registers. - blocked_core_registers_[S0] = true; - blocked_core_registers_[TR] = true; - - // Reserve T9 for function calls - blocked_core_registers_[T9] = true; - - if (GetGraph()->IsDebuggable()) { - // Stubs do not save callee-save floating point registers. If the graph - // is debuggable, we need to deal with these registers differently. For - // now, just block them. - for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { - blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; - } - } -} - -size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { - __ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index); - return kMips64DoublewordSize; -} - -size_t CodeGeneratorMIPS64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { - __ LoadFromOffset(kLoadDoubleword, GpuRegister(reg_id), SP, stack_index); - return kMips64DoublewordSize; -} - -size_t CodeGeneratorMIPS64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ StoreFpuToOffset(GetGraph()->HasSIMD() ? kStoreQuadword : kStoreDoubleword, - FpuRegister(reg_id), - SP, - stack_index); - return GetFloatingPointSpillSlotSize(); -} - -size_t CodeGeneratorMIPS64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ LoadFpuFromOffset(GetGraph()->HasSIMD() ? kLoadQuadword : kLoadDoubleword, - FpuRegister(reg_id), - SP, - stack_index); - return GetFloatingPointSpillSlotSize(); -} - -void CodeGeneratorMIPS64::DumpCoreRegister(std::ostream& stream, int reg) const { - stream << GpuRegister(reg); -} - -void CodeGeneratorMIPS64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { - stream << FpuRegister(reg); -} - -const Mips64InstructionSetFeatures& CodeGeneratorMIPS64::GetInstructionSetFeatures() const { - return *GetCompilerOptions().GetInstructionSetFeatures()->AsMips64InstructionSetFeatures(); -} - -void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path) { - ValidateInvokeRuntime(entrypoint, instruction, slow_path); - GenerateInvokeRuntime(GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value()); - if (EntrypointRequiresStackMap(entrypoint)) { - RecordPcInfo(instruction, dex_pc, slow_path); - } -} - -void CodeGeneratorMIPS64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, - HInstruction* instruction, - SlowPathCode* slow_path) { - ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); - GenerateInvokeRuntime(entry_point_offset); -} - -void CodeGeneratorMIPS64::GenerateInvokeRuntime(int32_t entry_point_offset) { - __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); - __ Jalr(T9); - __ Nop(); -} - -void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, - GpuRegister class_reg) { - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - const size_t status_byte_offset = - mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_initialized_value = - enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); - - __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset); - __ Sltiu(TMP, TMP, shifted_initialized_value); - __ Bnezc(TMP, slow_path->GetEntryLabel()); - // Even if the initialized flag is set, we need to ensure consistent memory ordering. - __ Sync(0); - __ Bind(slow_path->GetExitLabel()); -} - -void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, - GpuRegister temp) { - uint32_t path_to_root = check->GetBitstringPathToRoot(); - uint32_t mask = check->GetBitstringMask(); - DCHECK(IsPowerOfTwo(mask + 1)); - size_t mask_bits = WhichPowerOf2(mask + 1); - - if (mask_bits == 16u) { - // Load only the bitstring part of the status word. - __ LoadFromOffset( - kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); - // Compare the bitstring bits using XOR. - __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); - } else { - // /* uint32_t */ temp = temp->status_ - __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); - // Compare the bitstring bits using XOR. - if (IsUint<16>(path_to_root)) { - __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); - } else { - __ LoadConst32(TMP, path_to_root); - __ Xor(temp, temp, TMP); - } - // Shift out bits that do not contribute to the comparison. - __ Sll(temp, temp, 32 - mask_bits); - } -} - -void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { - __ Sync(0); // only stype 0 is supported -} - -void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruction, - HBasicBlock* successor) { - SuspendCheckSlowPathMIPS64* slow_path = - down_cast<SuspendCheckSlowPathMIPS64*>(instruction->GetSlowPath()); - - if (slow_path == nullptr) { - slow_path = - new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathMIPS64(instruction, successor); - instruction->SetSlowPath(slow_path); - codegen_->AddSlowPath(slow_path); - if (successor != nullptr) { - DCHECK(successor->IsLoopHeader()); - } - } else { - DCHECK_EQ(slow_path->GetSuccessor(), successor); - } - - __ LoadFromOffset(kLoadUnsignedHalfword, - TMP, - TR, - Thread::ThreadFlagsOffset<kMips64PointerSize>().Int32Value()); - if (successor == nullptr) { - __ Bnezc(TMP, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetReturnLabel()); - } else { - __ Beqzc(TMP, codegen_->GetLabelOf(successor)); - __ Bc(slow_path->GetEntryLabel()); - // slow_path will return to GetLabelOf(successor). - } -} - -InstructionCodeGeneratorMIPS64::InstructionCodeGeneratorMIPS64(HGraph* graph, - CodeGeneratorMIPS64* codegen) - : InstructionCodeGenerator(graph, codegen), - assembler_(codegen->GetAssembler()), - codegen_(codegen) {} - -void LocationsBuilderMIPS64::HandleBinaryOp(HBinaryOperation* instruction) { - DCHECK_EQ(instruction->InputCount(), 2U); - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - DataType::Type type = instruction->GetResultType(); - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: { - locations->SetInAt(0, Location::RequiresRegister()); - HInstruction* right = instruction->InputAt(1); - bool can_use_imm = false; - if (right->IsConstant()) { - int64_t imm = CodeGenerator::GetInt64ValueOf(right->AsConstant()); - if (instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) { - can_use_imm = IsUint<16>(imm); - } else { - DCHECK(instruction->IsAdd() || instruction->IsSub()); - bool single_use = right->GetUses().HasExactlyOneElement(); - if (instruction->IsSub()) { - if (!(type == DataType::Type::kInt32 && imm == INT32_MIN)) { - imm = -imm; - } - } - if (type == DataType::Type::kInt32) { - can_use_imm = IsInt<16>(imm) || (Low16Bits(imm) == 0) || single_use; - } else { - can_use_imm = IsInt<16>(imm) || (IsInt<32>(imm) && (Low16Bits(imm) == 0)) || single_use; - } - } - } - if (can_use_imm) - locations->SetInAt(1, Location::ConstantLocation(right->AsConstant())); - else - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected " << instruction->DebugName() << " type " << type; - } -} - -void InstructionCodeGeneratorMIPS64::HandleBinaryOp(HBinaryOperation* instruction) { - DataType::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = locations->InAt(1); - - GpuRegister rhs_reg = ZERO; - int64_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<GpuRegister>(); - } - - if (instruction->IsAnd()) { - if (use_imm) - __ Andi(dst, lhs, rhs_imm); - else - __ And(dst, lhs, rhs_reg); - } else if (instruction->IsOr()) { - if (use_imm) - __ Ori(dst, lhs, rhs_imm); - else - __ Or(dst, lhs, rhs_reg); - } else if (instruction->IsXor()) { - if (use_imm) - __ Xori(dst, lhs, rhs_imm); - else - __ Xor(dst, lhs, rhs_reg); - } else if (instruction->IsAdd() || instruction->IsSub()) { - if (instruction->IsSub()) { - rhs_imm = -rhs_imm; - } - if (type == DataType::Type::kInt32) { - if (use_imm) { - if (IsInt<16>(rhs_imm)) { - __ Addiu(dst, lhs, rhs_imm); - } else { - int16_t rhs_imm_high = High16Bits(rhs_imm); - int16_t rhs_imm_low = Low16Bits(rhs_imm); - if (rhs_imm_low < 0) { - rhs_imm_high += 1; - } - __ Aui(dst, lhs, rhs_imm_high); - if (rhs_imm_low != 0) { - __ Addiu(dst, dst, rhs_imm_low); - } - } - } else { - if (instruction->IsAdd()) { - __ Addu(dst, lhs, rhs_reg); - } else { - DCHECK(instruction->IsSub()); - __ Subu(dst, lhs, rhs_reg); - } - } - } else { - if (use_imm) { - if (IsInt<16>(rhs_imm)) { - __ Daddiu(dst, lhs, rhs_imm); - } else if (IsInt<32>(rhs_imm)) { - int16_t rhs_imm_high = High16Bits(rhs_imm); - int16_t rhs_imm_low = Low16Bits(rhs_imm); - bool overflow_hi16 = false; - if (rhs_imm_low < 0) { - rhs_imm_high += 1; - overflow_hi16 = (rhs_imm_high == -32768); - } - __ Daui(dst, lhs, rhs_imm_high); - if (rhs_imm_low != 0) { - __ Daddiu(dst, dst, rhs_imm_low); - } - if (overflow_hi16) { - __ Dahi(dst, 1); - } - } else { - int16_t rhs_imm_low = Low16Bits(Low32Bits(rhs_imm)); - if (rhs_imm_low < 0) { - rhs_imm += (INT64_C(1) << 16); - } - int16_t rhs_imm_upper = High16Bits(Low32Bits(rhs_imm)); - if (rhs_imm_upper < 0) { - rhs_imm += (INT64_C(1) << 32); - } - int16_t rhs_imm_high = Low16Bits(High32Bits(rhs_imm)); - if (rhs_imm_high < 0) { - rhs_imm += (INT64_C(1) << 48); - } - int16_t rhs_imm_top = High16Bits(High32Bits(rhs_imm)); - GpuRegister tmp = lhs; - if (rhs_imm_low != 0) { - __ Daddiu(dst, tmp, rhs_imm_low); - tmp = dst; - } - // Dahi and Dati must use the same input and output register, so we have to initialize - // the dst register using Daddiu or Daui, even when the intermediate value is zero: - // Daui(dst, lhs, 0). - if ((rhs_imm_upper != 0) || (rhs_imm_low == 0)) { - __ Daui(dst, tmp, rhs_imm_upper); - } - if (rhs_imm_high != 0) { - __ Dahi(dst, rhs_imm_high); - } - if (rhs_imm_top != 0) { - __ Dati(dst, rhs_imm_top); - } - } - } else if (instruction->IsAdd()) { - __ Daddu(dst, lhs, rhs_reg); - } else { - DCHECK(instruction->IsSub()); - __ Dsubu(dst, lhs, rhs_reg); - } - } - } - break; - } - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: { - FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); - FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); - if (instruction->IsAdd()) { - if (type == DataType::Type::kFloat32) - __ AddS(dst, lhs, rhs); - else - __ AddD(dst, lhs, rhs); - } else if (instruction->IsSub()) { - if (type == DataType::Type::kFloat32) - __ SubS(dst, lhs, rhs); - else - __ SubD(dst, lhs, rhs); - } else { - LOG(FATAL) << "Unexpected floating-point binary operation"; - } - break; - } - default: - LOG(FATAL) << "Unexpected binary operation type " << type; - } -} - -void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); - - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); - DataType::Type type = instr->GetResultType(); - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - } - default: - LOG(FATAL) << "Unexpected shift type " << type; - } -} - -void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); - LocationSummary* locations = instr->GetLocations(); - DataType::Type type = instr->GetType(); - - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = locations->InAt(1); - - GpuRegister rhs_reg = ZERO; - int64_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<GpuRegister>(); - } - - if (use_imm) { - uint32_t shift_value = rhs_imm & - (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance); - - if (shift_value == 0) { - if (dst != lhs) { - __ Move(dst, lhs); - } - } else if (type == DataType::Type::kInt32) { - if (instr->IsShl()) { - __ Sll(dst, lhs, shift_value); - } else if (instr->IsShr()) { - __ Sra(dst, lhs, shift_value); - } else if (instr->IsUShr()) { - __ Srl(dst, lhs, shift_value); - } else { - __ Rotr(dst, lhs, shift_value); - } - } else { - if (shift_value < 32) { - if (instr->IsShl()) { - __ Dsll(dst, lhs, shift_value); - } else if (instr->IsShr()) { - __ Dsra(dst, lhs, shift_value); - } else if (instr->IsUShr()) { - __ Dsrl(dst, lhs, shift_value); - } else { - __ Drotr(dst, lhs, shift_value); - } - } else { - shift_value -= 32; - if (instr->IsShl()) { - __ Dsll32(dst, lhs, shift_value); - } else if (instr->IsShr()) { - __ Dsra32(dst, lhs, shift_value); - } else if (instr->IsUShr()) { - __ Dsrl32(dst, lhs, shift_value); - } else { - __ Drotr32(dst, lhs, shift_value); - } - } - } - } else { - if (type == DataType::Type::kInt32) { - if (instr->IsShl()) { - __ Sllv(dst, lhs, rhs_reg); - } else if (instr->IsShr()) { - __ Srav(dst, lhs, rhs_reg); - } else if (instr->IsUShr()) { - __ Srlv(dst, lhs, rhs_reg); - } else { - __ Rotrv(dst, lhs, rhs_reg); - } - } else { - if (instr->IsShl()) { - __ Dsllv(dst, lhs, rhs_reg); - } else if (instr->IsShr()) { - __ Dsrav(dst, lhs, rhs_reg); - } else if (instr->IsUShr()) { - __ Dsrlv(dst, lhs, rhs_reg); - } else { - __ Drotrv(dst, lhs, rhs_reg); - } - } - } - break; - } - default: - LOG(FATAL) << "Unexpected shift operation type " << type; - } -} - -void LocationsBuilderMIPS64::VisitAdd(HAdd* instruction) { - HandleBinaryOp(instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitAdd(HAdd* instruction) { - HandleBinaryOp(instruction); -} - -void LocationsBuilderMIPS64::VisitAnd(HAnd* instruction) { - HandleBinaryOp(instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitAnd(HAnd* instruction) { - HandleBinaryOp(instruction); -} - -void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) { - DataType::Type type = instruction->GetType(); - bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (type == DataType::Type::kReference); - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(instruction, - object_array_get_with_read_barrier - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall); - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - } - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (DataType::IsFloatingPointType(type)) { - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - } else { - // The output overlaps in the case of an object array get with - // read barriers enabled: we do not want the move to overwrite the - // array's location, as we need it to emit the read barrier. - locations->SetOut(Location::RequiresRegister(), - object_array_get_with_read_barrier - ? Location::kOutputOverlap - : Location::kNoOutputOverlap); - } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier. - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - bool temp_needed = instruction->GetIndex()->IsConstant() - ? !kBakerReadBarrierThunksEnableForFields - : !kBakerReadBarrierThunksEnableForArrays; - if (temp_needed) { - locations->AddTemp(Location::RequiresRegister()); - } - } -} - -static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS64* codegen) { - auto null_checker = [codegen, instruction]() { - codegen->MaybeRecordImplicitNullCheck(instruction); - }; - return null_checker; -} - -void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Location obj_loc = locations->InAt(0); - GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - Location out_loc = locations->Out(); - Location index = locations->InAt(1); - uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); - auto null_checker = GetImplicitNullChecker(instruction, codegen_); - - DataType::Type type = instruction->GetType(); - const bool maybe_compressed_char_at = mirror::kUseStringCompression && - instruction->IsStringCharAt(); - switch (type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: { - GpuRegister out = out_loc.AsRegister<GpuRegister>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset, null_checker); - } else { - __ Daddu(TMP, obj, index.AsRegister<GpuRegister>()); - __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kInt8: { - GpuRegister out = out_loc.AsRegister<GpuRegister>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadSignedByte, out, obj, offset, null_checker); - } else { - __ Daddu(TMP, obj, index.AsRegister<GpuRegister>()); - __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kUint16: { - GpuRegister out = out_loc.AsRegister<GpuRegister>(); - if (maybe_compressed_char_at) { - uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); - __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker); - __ Dext(TMP, TMP, 0, 1); - static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, - "Expecting 0=compressed, 1=uncompressed"); - } - if (index.IsConstant()) { - int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); - if (maybe_compressed_char_at) { - Mips64Label uncompressed_load, done; - __ Bnezc(TMP, &uncompressed_load); - __ LoadFromOffset(kLoadUnsignedByte, - out, - obj, - data_offset + (const_index << TIMES_1)); - __ Bc(&done); - __ Bind(&uncompressed_load); - __ LoadFromOffset(kLoadUnsignedHalfword, - out, - obj, - data_offset + (const_index << TIMES_2)); - __ Bind(&done); - } else { - __ LoadFromOffset(kLoadUnsignedHalfword, - out, - obj, - data_offset + (const_index << TIMES_2), - null_checker); - } - } else { - GpuRegister index_reg = index.AsRegister<GpuRegister>(); - if (maybe_compressed_char_at) { - Mips64Label uncompressed_load, done; - __ Bnezc(TMP, &uncompressed_load); - __ Daddu(TMP, obj, index_reg); - __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset); - __ Bc(&done); - __ Bind(&uncompressed_load); - __ Dlsa(TMP, index_reg, obj, TIMES_2); - __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset); - __ Bind(&done); - } else { - __ Dlsa(TMP, index_reg, obj, TIMES_2); - __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker); - } - } - break; - } - - case DataType::Type::kInt16: { - GpuRegister out = out_loc.AsRegister<GpuRegister>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker); - } else { - __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_2); - __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kInt32: { - DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); - GpuRegister out = out_loc.AsRegister<GpuRegister>(); - LoadOperandType load_type = - (type == DataType::Type::kReference) ? kLoadUnsignedWord : kLoadWord; - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadFromOffset(load_type, out, obj, offset, null_checker); - } else { - __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_4); - __ LoadFromOffset(load_type, out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kReference: { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - // /* HeapReference<Object> */ out = - // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - bool temp_needed = index.IsConstant() - ? !kBakerReadBarrierThunksEnableForFields - : !kBakerReadBarrierThunksEnableForArrays; - Location temp = temp_needed ? locations->GetTemp(0) : Location::NoLocation(); - // Note that a potential implicit null check is handled in this - // CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier call. - DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); - if (index.IsConstant()) { - // Array load with a constant index can be treated as a field load. - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, - out_loc, - obj, - offset, - temp, - /* needs_null_check= */ false); - } else { - codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, - out_loc, - obj, - data_offset, - index, - temp, - /* needs_null_check= */ false); - } - } else { - GpuRegister out = out_loc.AsRegister<GpuRegister>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadFromOffset(kLoadUnsignedWord, out, obj, offset, null_checker); - // If read barriers are enabled, emit read barriers other than - // Baker's using a slow path (and also unpoison the loaded - // reference, if heap poisoning is enabled). - codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); - } else { - __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_4); - __ LoadFromOffset(kLoadUnsignedWord, out, TMP, data_offset, null_checker); - // If read barriers are enabled, emit read barriers other than - // Baker's using a slow path (and also unpoison the loaded - // reference, if heap poisoning is enabled). - codegen_->MaybeGenerateReadBarrierSlow(instruction, - out_loc, - out_loc, - obj_loc, - data_offset, - index); - } - } - break; - } - - case DataType::Type::kInt64: { - GpuRegister out = out_loc.AsRegister<GpuRegister>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker); - } else { - __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_8); - __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kFloat32: { - FpuRegister out = out_loc.AsFpuRegister<FpuRegister>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadFpuFromOffset(kLoadWord, out, obj, offset, null_checker); - } else { - __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_4); - __ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kFloat64: { - FpuRegister out = out_loc.AsFpuRegister<FpuRegister>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset, null_checker); - } else { - __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_8); - __ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker); - } - break; - } - - case DataType::Type::kUint32: - case DataType::Type::kUint64: - case DataType::Type::kVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitArrayLength(HArrayLength* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorMIPS64::VisitArrayLength(HArrayLength* instruction) { - LocationSummary* locations = instruction->GetLocations(); - uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); - GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - __ LoadFromOffset(kLoadWord, out, obj, offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - // Mask out compression flag from String's array length. - if (mirror::kUseStringCompression && instruction->IsStringLength()) { - __ Srl(out, out, 1u); - } -} - -Location LocationsBuilderMIPS64::RegisterOrZeroConstant(HInstruction* instruction) { - return (instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern()) - ? Location::ConstantLocation(instruction->AsConstant()) - : Location::RequiresRegister(); -} - -Location LocationsBuilderMIPS64::FpuRegisterOrConstantForStore(HInstruction* instruction) { - // We can store 0.0 directly (from the ZERO register) without loading it into an FPU register. - // We can store a non-zero float or double constant without first loading it into the FPU, - // but we should only prefer this if the constant has a single use. - if (instruction->IsConstant() && - (instruction->AsConstant()->IsZeroBitPattern() || - instruction->GetUses().HasExactlyOneElement())) { - return Location::ConstantLocation(instruction->AsConstant()); - // Otherwise fall through and require an FPU register for the constant. - } - return Location::RequiresFpuRegister(); -} - -void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) { - DataType::Type value_type = instruction->GetComponentType(); - - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, - may_need_runtime_call_for_type_check ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); - - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (DataType::IsFloatingPointType(instruction->InputAt(2)->GetType())) { - locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); - } else { - locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); - } - if (needs_write_barrier) { - // Temporary register for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. - } -} - -void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); - Location index = locations->InAt(1); - Location value_location = locations->InAt(2); - DataType::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - bool needs_write_barrier = - CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - auto null_checker = GetImplicitNullChecker(instruction, codegen_); - GpuRegister base_reg = index.IsConstant() ? obj : TMP; - - switch (value_type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1; - } else { - __ Daddu(base_reg, obj, index.AsRegister<GpuRegister>()); - } - if (value_location.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreByte, value, base_reg, data_offset, TMP, null_checker); - } else { - GpuRegister value = value_location.AsRegister<GpuRegister>(); - __ StoreToOffset(kStoreByte, value, base_reg, data_offset, null_checker); - } - break; - } - - case DataType::Type::kUint16: - case DataType::Type::kInt16: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2; - } else { - __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_2); - } - if (value_location.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreHalfword, value, base_reg, data_offset, TMP, null_checker); - } else { - GpuRegister value = value_location.AsRegister<GpuRegister>(); - __ StoreToOffset(kStoreHalfword, value, base_reg, data_offset, null_checker); - } - break; - } - - case DataType::Type::kInt32: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; - } else { - __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4); - } - if (value_location.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); - } else { - GpuRegister value = value_location.AsRegister<GpuRegister>(); - __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); - } - break; - } - - case DataType::Type::kReference: { - if (value_location.IsConstant()) { - // Just setting null. - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; - } else { - __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4); - } - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - DCHECK_EQ(value, 0); - __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); - DCHECK(!needs_write_barrier); - DCHECK(!may_need_runtime_call_for_type_check); - break; - } - - DCHECK(needs_write_barrier); - GpuRegister value = value_location.AsRegister<GpuRegister>(); - GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>(); - GpuRegister temp2 = TMP; // Doesn't need to survive slow path. - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - Mips64Label done; - SlowPathCodeMIPS64* slow_path = nullptr; - - if (may_need_runtime_call_for_type_check) { - slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathMIPS64(instruction); - codegen_->AddSlowPath(slow_path); - if (instruction->GetValueCanBeNull()) { - Mips64Label non_zero; - __ Bnezc(value, &non_zero); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; - } else { - __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4); - } - __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); - __ Bc(&done); - __ Bind(&non_zero); - } - - // Note that when read barriers are enabled, the type checks - // are performed without read barriers. This is fine, even in - // the case where a class object is in the from-space after - // the flip, as a comparison involving such a type would not - // produce a false positive; it may of course produce a false - // negative, in which case we would take the ArraySet slow - // path. - - // /* HeapReference<Class> */ temp1 = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, temp1, obj, class_offset, null_checker); - __ MaybeUnpoisonHeapReference(temp1); - - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, component_offset); - // /* HeapReference<Class> */ temp2 = value->klass_ - __ LoadFromOffset(kLoadUnsignedWord, temp2, value, class_offset); - // If heap poisoning is enabled, no need to unpoison `temp1` - // nor `temp2`, as we are comparing two poisoned references. - - if (instruction->StaticTypeOfArrayIsObjectArray()) { - Mips64Label do_put; - __ Beqc(temp1, temp2, &do_put); - // If heap poisoning is enabled, the `temp1` reference has - // not been unpoisoned yet; unpoison it now. - __ MaybeUnpoisonHeapReference(temp1); - - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, super_offset); - // If heap poisoning is enabled, no need to unpoison - // `temp1`, as we are comparing against null below. - __ Bnezc(temp1, slow_path->GetEntryLabel()); - __ Bind(&do_put); - } else { - __ Bnec(temp1, temp2, slow_path->GetEntryLabel()); - } - } - - GpuRegister source = value; - if (kPoisonHeapReferences) { - // Note that in the case where `value` is a null reference, - // we do not enter this block, as a null reference does not - // need poisoning. - __ Move(temp1, value); - __ PoisonHeapReference(temp1); - source = temp1; - } - - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; - } else { - __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4); - } - __ StoreToOffset(kStoreWord, source, base_reg, data_offset); - - if (!may_need_runtime_call_for_type_check) { - codegen_->MaybeRecordImplicitNullCheck(instruction); - } - - codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); - - if (done.IsLinked()) { - __ Bind(&done); - } - - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } - break; - } - - case DataType::Type::kInt64: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8; - } else { - __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_8); - } - if (value_location.IsConstant()) { - int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker); - } else { - GpuRegister value = value_location.AsRegister<GpuRegister>(); - __ StoreToOffset(kStoreDoubleword, value, base_reg, data_offset, null_checker); - } - break; - } - - case DataType::Type::kFloat32: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; - } else { - __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4); - } - if (value_location.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); - } else { - FpuRegister value = value_location.AsFpuRegister<FpuRegister>(); - __ StoreFpuToOffset(kStoreWord, value, base_reg, data_offset, null_checker); - } - break; - } - - case DataType::Type::kFloat64: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - if (index.IsConstant()) { - data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8; - } else { - __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_8); - } - if (value_location.IsConstant()) { - int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker); - } else { - FpuRegister value = value_location.AsFpuRegister<FpuRegister>(); - __ StoreFpuToOffset(kStoreDoubleword, value, base_reg, data_offset, null_checker); - } - break; - } - - case DataType::Type::kUint32: - case DataType::Type::kUint64: - case DataType::Type::kVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) { - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); - - HInstruction* index = instruction->InputAt(0); - HInstruction* length = instruction->InputAt(1); - - bool const_index = false; - bool const_length = false; - - if (index->IsConstant()) { - if (length->IsConstant()) { - const_index = true; - const_length = true; - } else { - int32_t index_value = index->AsIntConstant()->GetValue(); - if (index_value < 0 || IsInt<16>(index_value + 1)) { - const_index = true; - } - } - } else if (length->IsConstant()) { - int32_t length_value = length->AsIntConstant()->GetValue(); - if (IsUint<15>(length_value)) { - const_length = true; - } - } - - locations->SetInAt(0, const_index - ? Location::ConstantLocation(index->AsConstant()) - : Location::RequiresRegister()); - locations->SetInAt(1, const_length - ? Location::ConstantLocation(length->AsConstant()) - : Location::RequiresRegister()); -} - -void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Location index_loc = locations->InAt(0); - Location length_loc = locations->InAt(1); - - if (length_loc.IsConstant()) { - int32_t length = length_loc.GetConstant()->AsIntConstant()->GetValue(); - if (index_loc.IsConstant()) { - int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue(); - if (index < 0 || index >= length) { - BoundsCheckSlowPathMIPS64* slow_path = - new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS64(instruction); - codegen_->AddSlowPath(slow_path); - __ Bc(slow_path->GetEntryLabel()); - } else { - // Nothing to be done. - } - return; - } - - BoundsCheckSlowPathMIPS64* slow_path = - new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS64(instruction); - codegen_->AddSlowPath(slow_path); - GpuRegister index = index_loc.AsRegister<GpuRegister>(); - if (length == 0) { - __ Bc(slow_path->GetEntryLabel()); - } else if (length == 1) { - __ Bnezc(index, slow_path->GetEntryLabel()); - } else { - DCHECK(IsUint<15>(length)) << length; - __ Sltiu(TMP, index, length); - __ Beqzc(TMP, slow_path->GetEntryLabel()); - } - } else { - GpuRegister length = length_loc.AsRegister<GpuRegister>(); - BoundsCheckSlowPathMIPS64* slow_path = - new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS64(instruction); - codegen_->AddSlowPath(slow_path); - if (index_loc.IsConstant()) { - int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue(); - if (index < 0) { - __ Bc(slow_path->GetEntryLabel()); - } else if (index == 0) { - __ Blezc(length, slow_path->GetEntryLabel()); - } else { - DCHECK(IsInt<16>(index + 1)) << index; - __ Sltiu(TMP, length, index + 1); - __ Bnezc(TMP, slow_path->GetEntryLabel()); - } - } else { - GpuRegister index = index_loc.AsRegister<GpuRegister>(); - __ Bgeuc(index, length, slow_path->GetEntryLabel()); - } - } -} - -// Temp is used for read barrier. -static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (kEmitCompilerReadBarrier && - !(kUseBakerReadBarrier && kBakerReadBarrierThunksEnableForFields) && - (kUseBakerReadBarrier || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { - return 1; - } - return 0; -} - -// Extra temp is used for read barrier. -static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { - return 1 + NumberOfInstanceOfTemps(type_check_kind); -} - -void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); - locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } - locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); -} - -void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - LocationSummary* locations = instruction->GetLocations(); - Location obj_loc = locations->InAt(0); - GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - Location cls = locations->InAt(1); - Location temp_loc = locations->GetTemp(0); - GpuRegister temp = temp_loc.AsRegister<GpuRegister>(); - const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); - DCHECK_LE(num_temps, 2u); - Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); - const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); - const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); - const uint32_t object_array_data_offset = - mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - Mips64Label done; - - bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); - SlowPathCodeMIPS64* slow_path = - new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( - instruction, is_type_check_slow_path_fatal); - codegen_->AddSlowPath(slow_path); - - // Avoid this check if we know `obj` is not null. - if (instruction->MustDoNullCheck()) { - __ Beqzc(obj, &done); - } - - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kArrayCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // Jump to slow path for throwing the exception or doing a - // more involved array check. - __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); - break; - } - - case TypeCheckKind::kAbstractClassCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // If the class is abstract, we eagerly fetch the super class of the - // object to avoid doing a comparison we know will fail. - Mips64Label loop; - __ Bind(&loop); - // /* HeapReference<Class> */ temp = temp->super_class_ - GenerateReferenceLoadOneRegister(instruction, - temp_loc, - super_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // If the class reference currently in `temp` is null, jump to the slow path to throw the - // exception. - __ Beqzc(temp, slow_path->GetEntryLabel()); - // Otherwise, compare the classes. - __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop); - break; - } - - case TypeCheckKind::kClassHierarchyCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // Walk over the class hierarchy to find a match. - Mips64Label loop; - __ Bind(&loop); - __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); - // /* HeapReference<Class> */ temp = temp->super_class_ - GenerateReferenceLoadOneRegister(instruction, - temp_loc, - super_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // If the class reference currently in `temp` is null, jump to the slow path to throw the - // exception. Otherwise, jump to the beginning of the loop. - __ Bnezc(temp, &loop); - __ Bc(slow_path->GetEntryLabel()); - break; - } - - case TypeCheckKind::kArrayObjectCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // Do an exact check. - __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); - // Otherwise, we need to check that the object's class is a non-primitive array. - // /* HeapReference<Class> */ temp = temp->component_type_ - GenerateReferenceLoadOneRegister(instruction, - temp_loc, - component_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // If the component type is null, jump to the slow path to throw the exception. - __ Beqzc(temp, slow_path->GetEntryLabel()); - // Otherwise, the object is indeed an array, further check that this component - // type is not a primitive type. - __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Bnezc(temp, slow_path->GetEntryLabel()); - break; - } - - case TypeCheckKind::kUnresolvedCheck: - // We always go into the type check slow path for the unresolved check case. - // We cannot directly call the CheckCast runtime entry point - // without resorting to a type checking slow path here (i.e. by - // calling InvokeRuntime directly), as it would require to - // assign fixed registers for the inputs of this HInstanceOf - // instruction (following the runtime calling convention), which - // might be cluttered by the potential first read barrier - // emission at the beginning of this method. - __ Bc(slow_path->GetEntryLabel()); - break; - - case TypeCheckKind::kInterfaceCheck: { - // Avoid read barriers to improve performance of the fast path. We can not get false - // positives by doing this. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // /* HeapReference<Class> */ temp = temp->iftable_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - temp_loc, - iftable_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - // Iftable is never null. - __ Lw(TMP, temp, array_length_offset); - // Loop through the iftable and check if any class matches. - Mips64Label loop; - __ Bind(&loop); - __ Beqzc(TMP, slow_path->GetEntryLabel()); - __ Lwu(AT, temp, object_array_data_offset); - __ MaybeUnpoisonHeapReference(AT); - // Go to next interface. - __ Daddiu(temp, temp, 2 * kHeapReferenceSize); - __ Addiu(TMP, TMP, -2); - // Compare the classes and continue the loop if they do not match. - __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop); - break; - } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, temp); - __ Bnezc(temp, slow_path->GetEntryLabel()); - break; - } - } - - __ Bind(&done); - __ Bind(slow_path->GetExitLabel()); -} - -void LocationsBuilderMIPS64::VisitClinitCheck(HClinitCheck* check) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); - locations->SetInAt(0, Location::RequiresRegister()); - if (check->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } - // Rely on the type initialization to save everything we need. - locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); -} - -void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) { - // We assume the class is not null. - SlowPathCodeMIPS64* slow_path = - new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64(check->GetLoadClass(), check); - codegen_->AddSlowPath(slow_path); - GenerateClassInitializationCheck(slow_path, - check->GetLocations()->InAt(0).AsRegister<GpuRegister>()); -} - -void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { - DataType::Type in_type = compare->InputAt(0)->GetType(); - - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(compare); - - switch (in_type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected type for compare operation " << in_type; - } -} - -void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { - LocationSummary* locations = instruction->GetLocations(); - GpuRegister res = locations->Out().AsRegister<GpuRegister>(); - DataType::Type in_type = instruction->InputAt(0)->GetType(); - - // 0 if: left == right - // 1 if: left > right - // -1 if: left < right - switch (in_type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: { - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = locations->InAt(1); - bool use_imm = rhs_location.IsConstant(); - GpuRegister rhs = ZERO; - if (use_imm) { - if (in_type == DataType::Type::kInt64) { - int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant()); - if (value != 0) { - rhs = AT; - __ LoadConst64(rhs, value); - } - } else { - int32_t value = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()->AsConstant()); - if (value != 0) { - rhs = AT; - __ LoadConst32(rhs, value); - } - } - } else { - rhs = rhs_location.AsRegister<GpuRegister>(); - } - __ Slt(TMP, lhs, rhs); - __ Slt(res, rhs, lhs); - __ Subu(res, res, TMP); - break; - } - - case DataType::Type::kFloat32: { - FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); - Mips64Label done; - __ CmpEqS(FTMP, lhs, rhs); - __ LoadConst32(res, 0); - __ Bc1nez(FTMP, &done); - if (instruction->IsGtBias()) { - __ CmpLtS(FTMP, lhs, rhs); - __ LoadConst32(res, -1); - __ Bc1nez(FTMP, &done); - __ LoadConst32(res, 1); - } else { - __ CmpLtS(FTMP, rhs, lhs); - __ LoadConst32(res, 1); - __ Bc1nez(FTMP, &done); - __ LoadConst32(res, -1); - } - __ Bind(&done); - break; - } - - case DataType::Type::kFloat64: { - FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); - Mips64Label done; - __ CmpEqD(FTMP, lhs, rhs); - __ LoadConst32(res, 0); - __ Bc1nez(FTMP, &done); - if (instruction->IsGtBias()) { - __ CmpLtD(FTMP, lhs, rhs); - __ LoadConst32(res, -1); - __ Bc1nez(FTMP, &done); - __ LoadConst32(res, 1); - } else { - __ CmpLtD(FTMP, rhs, lhs); - __ LoadConst32(res, 1); - __ Bc1nez(FTMP, &done); - __ LoadConst32(res, -1); - } - __ Bind(&done); - break; - } - - default: - LOG(FATAL) << "Unimplemented compare type " << in_type; - } -} - -void LocationsBuilderMIPS64::HandleCondition(HCondition* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - switch (instruction->InputAt(0)->GetType()) { - default: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - break; - } - if (!instruction->IsEmittedAtUseSite()) { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } -} - -void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) { - if (instruction->IsEmittedAtUseSite()) { - return; - } - - DataType::Type type = instruction->InputAt(0)->GetType(); - LocationSummary* locations = instruction->GetLocations(); - switch (type) { - default: - // Integer case. - GenerateIntLongCompare(instruction->GetCondition(), /* is64bit= */ false, locations); - return; - case DataType::Type::kInt64: - GenerateIntLongCompare(instruction->GetCondition(), /* is64bit= */ true, locations); - return; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations); - return; - } -} - -void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - DataType::Type type = instruction->GetResultType(); - - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); - DCHECK(second.IsConstant()); - - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); - int64_t imm = Int64FromConstant(second.GetConstant()); - DCHECK(imm == 1 || imm == -1); - - if (instruction->IsRem()) { - __ Move(out, ZERO); - } else { - if (imm == -1) { - if (type == DataType::Type::kInt32) { - __ Subu(out, ZERO, dividend); - } else { - DCHECK_EQ(type, DataType::Type::kInt64); - __ Dsubu(out, ZERO, dividend); - } - } else if (out != dividend) { - __ Move(out, dividend); - } - } -} - -void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - DataType::Type type = instruction->GetResultType(); - - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); - DCHECK(second.IsConstant()); - - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); - int64_t imm = Int64FromConstant(second.GetConstant()); - uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); - int ctz_imm = CTZ(abs_imm); - - if (instruction->IsDiv()) { - if (type == DataType::Type::kInt32) { - if (ctz_imm == 1) { - // Fast path for division by +/-2, which is very common. - __ Srl(TMP, dividend, 31); - } else { - __ Sra(TMP, dividend, 31); - __ Srl(TMP, TMP, 32 - ctz_imm); - } - __ Addu(out, dividend, TMP); - __ Sra(out, out, ctz_imm); - if (imm < 0) { - __ Subu(out, ZERO, out); - } - } else { - DCHECK_EQ(type, DataType::Type::kInt64); - if (ctz_imm == 1) { - // Fast path for division by +/-2, which is very common. - __ Dsrl32(TMP, dividend, 31); - } else { - __ Dsra32(TMP, dividend, 31); - if (ctz_imm > 32) { - __ Dsrl(TMP, TMP, 64 - ctz_imm); - } else { - __ Dsrl32(TMP, TMP, 32 - ctz_imm); - } - } - __ Daddu(out, dividend, TMP); - if (ctz_imm < 32) { - __ Dsra(out, out, ctz_imm); - } else { - __ Dsra32(out, out, ctz_imm - 32); - } - if (imm < 0) { - __ Dsubu(out, ZERO, out); - } - } - } else { - if (type == DataType::Type::kInt32) { - if (ctz_imm == 1) { - // Fast path for modulo +/-2, which is very common. - __ Sra(TMP, dividend, 31); - __ Subu(out, dividend, TMP); - __ Andi(out, out, 1); - __ Addu(out, out, TMP); - } else { - __ Sra(TMP, dividend, 31); - __ Srl(TMP, TMP, 32 - ctz_imm); - __ Addu(out, dividend, TMP); - __ Ins(out, ZERO, ctz_imm, 32 - ctz_imm); - __ Subu(out, out, TMP); - } - } else { - DCHECK_EQ(type, DataType::Type::kInt64); - if (ctz_imm == 1) { - // Fast path for modulo +/-2, which is very common. - __ Dsra32(TMP, dividend, 31); - __ Dsubu(out, dividend, TMP); - __ Andi(out, out, 1); - __ Daddu(out, out, TMP); - } else { - __ Dsra32(TMP, dividend, 31); - if (ctz_imm > 32) { - __ Dsrl(TMP, TMP, 64 - ctz_imm); - } else { - __ Dsrl32(TMP, TMP, 32 - ctz_imm); - } - __ Daddu(out, dividend, TMP); - __ DblIns(out, ZERO, ctz_imm, 64 - ctz_imm); - __ Dsubu(out, out, TMP); - } - } - } -} - -void InstructionCodeGeneratorMIPS64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); - DCHECK(second.IsConstant()); - - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); - int64_t imm = Int64FromConstant(second.GetConstant()); - - DataType::Type type = instruction->GetResultType(); - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type; - - int64_t magic; - int shift; - CalculateMagicAndShiftForDivRem(imm, - (type == DataType::Type::kInt64), - &magic, - &shift); - - if (type == DataType::Type::kInt32) { - __ LoadConst32(TMP, magic); - __ MuhR6(TMP, dividend, TMP); - - if (imm > 0 && magic < 0) { - __ Addu(TMP, TMP, dividend); - } else if (imm < 0 && magic > 0) { - __ Subu(TMP, TMP, dividend); - } - - if (shift != 0) { - __ Sra(TMP, TMP, shift); - } - - if (instruction->IsDiv()) { - __ Sra(out, TMP, 31); - __ Subu(out, TMP, out); - } else { - __ Sra(AT, TMP, 31); - __ Subu(AT, TMP, AT); - __ LoadConst32(TMP, imm); - __ MulR6(TMP, AT, TMP); - __ Subu(out, dividend, TMP); - } - } else { - __ LoadConst64(TMP, magic); - __ Dmuh(TMP, dividend, TMP); - - if (imm > 0 && magic < 0) { - __ Daddu(TMP, TMP, dividend); - } else if (imm < 0 && magic > 0) { - __ Dsubu(TMP, TMP, dividend); - } - - if (shift >= 32) { - __ Dsra32(TMP, TMP, shift - 32); - } else if (shift > 0) { - __ Dsra(TMP, TMP, shift); - } - - if (instruction->IsDiv()) { - __ Dsra32(out, TMP, 31); - __ Dsubu(out, TMP, out); - } else { - __ Dsra32(AT, TMP, 31); - __ Dsubu(AT, TMP, AT); - __ LoadConst64(TMP, imm); - __ Dmul(TMP, AT, TMP); - __ Dsubu(out, dividend, TMP); - } - } -} - -void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - DataType::Type type = instruction->GetResultType(); - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type; - - LocationSummary* locations = instruction->GetLocations(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - Location second = locations->InAt(1); - - if (second.IsConstant()) { - int64_t imm = Int64FromConstant(second.GetConstant()); - if (imm == 0) { - // Do not generate anything. DivZeroCheck would prevent any code to be executed. - } else if (imm == 1 || imm == -1) { - DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(AbsOrMin(imm))) { - DivRemByPowerOfTwo(instruction); - } else { - DCHECK(imm <= -2 || imm >= 2); - GenerateDivRemWithAnyConstant(instruction); - } - } else { - GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister divisor = second.AsRegister<GpuRegister>(); - if (instruction->IsDiv()) { - if (type == DataType::Type::kInt32) - __ DivR6(out, dividend, divisor); - else - __ Ddiv(out, dividend, divisor); - } else { - if (type == DataType::Type::kInt32) - __ ModR6(out, dividend, divisor); - else - __ Dmod(out, dividend, divisor); - } - } -} - -void LocationsBuilderMIPS64::VisitDiv(HDiv* div) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall); - switch (div->GetResultType()) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected div type " << div->GetResultType(); - } -} - -void InstructionCodeGeneratorMIPS64::VisitDiv(HDiv* instruction) { - DataType::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - GenerateDivRemIntegral(instruction); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: { - FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); - FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); - if (type == DataType::Type::kFloat32) - __ DivS(dst, lhs, rhs); - else - __ DivD(dst, lhs, rhs); - break; - } - default: - LOG(FATAL) << "Unexpected div type " << type; - } -} - -void LocationsBuilderMIPS64::VisitDivZeroCheck(HDivZeroCheck* instruction) { - LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); - locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); -} - -void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instruction) { - SlowPathCodeMIPS64* slow_path = - new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathMIPS64(instruction); - codegen_->AddSlowPath(slow_path); - Location value = instruction->GetLocations()->InAt(0); - - DataType::Type type = instruction->GetType(); - - if (!DataType::IsIntegralType(type)) { - LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; - UNREACHABLE(); - } - - if (value.IsConstant()) { - int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant()); - if (divisor == 0) { - __ Bc(slow_path->GetEntryLabel()); - } else { - // A division by a non-null constant is valid. We don't need to perform - // any check, so simply fall through. - } - } else { - __ Beqzc(value.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); - } -} - -void LocationsBuilderMIPS64::VisitDoubleConstant(HDoubleConstant* constant) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); - locations->SetOut(Location::ConstantLocation(constant)); -} - -void InstructionCodeGeneratorMIPS64::VisitDoubleConstant(HDoubleConstant* cst ATTRIBUTE_UNUSED) { - // Will be generated at use site. -} - -void LocationsBuilderMIPS64::VisitExit(HExit* exit) { - exit->SetLocations(nullptr); -} - -void InstructionCodeGeneratorMIPS64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { -} - -void LocationsBuilderMIPS64::VisitFloatConstant(HFloatConstant* constant) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); - locations->SetOut(Location::ConstantLocation(constant)); -} - -void InstructionCodeGeneratorMIPS64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { - // Will be generated at use site. -} - -void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock* successor) { - if (successor->IsExitBlock()) { - DCHECK(got->GetPrevious()->AlwaysThrows()); - return; // no code needed - } - - HBasicBlock* block = got->GetBlock(); - HInstruction* previous = got->GetPrevious(); - HLoopInformation* info = block->GetLoopInformation(); - - if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - __ Ld(AT, SP, kCurrentMethodStackOffset); - __ Lhu(TMP, AT, ArtMethod::HotnessCountOffset().Int32Value()); - __ Addiu(TMP, TMP, 1); - __ Sh(TMP, AT, ArtMethod::HotnessCountOffset().Int32Value()); - } - GenerateSuspendCheck(info->GetSuspendCheck(), successor); - return; - } - if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { - GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); - } - if (!codegen_->GoesToNextBlock(block, successor)) { - __ Bc(codegen_->GetLabelOf(successor)); - } -} - -void LocationsBuilderMIPS64::VisitGoto(HGoto* got) { - got->SetLocations(nullptr); -} - -void InstructionCodeGeneratorMIPS64::VisitGoto(HGoto* got) { - HandleGoto(got, got->GetSuccessor()); -} - -void LocationsBuilderMIPS64::VisitTryBoundary(HTryBoundary* try_boundary) { - try_boundary->SetLocations(nullptr); -} - -void InstructionCodeGeneratorMIPS64::VisitTryBoundary(HTryBoundary* try_boundary) { - HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); - if (!successor->IsExitBlock()) { - HandleGoto(try_boundary, successor); - } -} - -void InstructionCodeGeneratorMIPS64::GenerateIntLongCompare(IfCondition cond, - bool is64bit, - LocationSummary* locations) { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = locations->InAt(1); - GpuRegister rhs_reg = ZERO; - int64_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - if (is64bit) { - rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); - } else { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } - } else { - rhs_reg = rhs_location.AsRegister<GpuRegister>(); - } - int64_t rhs_imm_plus_one = rhs_imm + UINT64_C(1); - - switch (cond) { - case kCondEQ: - case kCondNE: - if (use_imm && IsInt<16>(-rhs_imm)) { - if (rhs_imm == 0) { - if (cond == kCondEQ) { - __ Sltiu(dst, lhs, 1); - } else { - __ Sltu(dst, ZERO, lhs); - } - } else { - if (is64bit) { - __ Daddiu(dst, lhs, -rhs_imm); - } else { - __ Addiu(dst, lhs, -rhs_imm); - } - if (cond == kCondEQ) { - __ Sltiu(dst, dst, 1); - } else { - __ Sltu(dst, ZERO, dst); - } - } - } else { - if (use_imm && IsUint<16>(rhs_imm)) { - __ Xori(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst64(rhs_reg, rhs_imm); - } - __ Xor(dst, lhs, rhs_reg); - } - if (cond == kCondEQ) { - __ Sltiu(dst, dst, 1); - } else { - __ Sltu(dst, ZERO, dst); - } - } - break; - - case kCondLT: - case kCondGE: - if (use_imm && IsInt<16>(rhs_imm)) { - __ Slti(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst64(rhs_reg, rhs_imm); - } - __ Slt(dst, lhs, rhs_reg); - } - if (cond == kCondGE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the slt instruction but no sge. - __ Xori(dst, dst, 1); - } - break; - - case kCondLE: - case kCondGT: - if (use_imm && IsInt<16>(rhs_imm_plus_one)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Slti(dst, lhs, rhs_imm_plus_one); - if (cond == kCondGT) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the slti instruction but no sgti. - __ Xori(dst, dst, 1); - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst64(rhs_reg, rhs_imm); - } - __ Slt(dst, rhs_reg, lhs); - if (cond == kCondLE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the slt instruction but no sle. - __ Xori(dst, dst, 1); - } - } - break; - - case kCondB: - case kCondAE: - if (use_imm && IsInt<16>(rhs_imm)) { - // Sltiu sign-extends its 16-bit immediate operand before - // the comparison and thus lets us compare directly with - // unsigned values in the ranges [0, 0x7fff] and - // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. - __ Sltiu(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst64(rhs_reg, rhs_imm); - } - __ Sltu(dst, lhs, rhs_reg); - } - if (cond == kCondAE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the sltu instruction but no sgeu. - __ Xori(dst, dst, 1); - } - break; - - case kCondBE: - case kCondA: - if (use_imm && (rhs_imm_plus_one != 0) && IsInt<16>(rhs_imm_plus_one)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - // Note that this only works if rhs + 1 does not overflow - // to 0, hence the check above. - // Sltiu sign-extends its 16-bit immediate operand before - // the comparison and thus lets us compare directly with - // unsigned values in the ranges [0, 0x7fff] and - // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. - __ Sltiu(dst, lhs, rhs_imm_plus_one); - if (cond == kCondA) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the sltiu instruction but no sgtiu. - __ Xori(dst, dst, 1); - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst64(rhs_reg, rhs_imm); - } - __ Sltu(dst, rhs_reg, lhs); - if (cond == kCondBE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the sltu instruction but no sleu. - __ Xori(dst, dst, 1); - } - } - break; - } -} - -bool InstructionCodeGeneratorMIPS64::MaterializeIntLongCompare(IfCondition cond, - bool is64bit, - LocationSummary* input_locations, - GpuRegister dst) { - GpuRegister lhs = input_locations->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = input_locations->InAt(1); - GpuRegister rhs_reg = ZERO; - int64_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - if (is64bit) { - rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); - } else { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } - } else { - rhs_reg = rhs_location.AsRegister<GpuRegister>(); - } - int64_t rhs_imm_plus_one = rhs_imm + UINT64_C(1); - - switch (cond) { - case kCondEQ: - case kCondNE: - if (use_imm && IsInt<16>(-rhs_imm)) { - if (is64bit) { - __ Daddiu(dst, lhs, -rhs_imm); - } else { - __ Addiu(dst, lhs, -rhs_imm); - } - } else if (use_imm && IsUint<16>(rhs_imm)) { - __ Xori(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst64(rhs_reg, rhs_imm); - } - __ Xor(dst, lhs, rhs_reg); - } - return (cond == kCondEQ); - - case kCondLT: - case kCondGE: - if (use_imm && IsInt<16>(rhs_imm)) { - __ Slti(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst64(rhs_reg, rhs_imm); - } - __ Slt(dst, lhs, rhs_reg); - } - return (cond == kCondGE); - - case kCondLE: - case kCondGT: - if (use_imm && IsInt<16>(rhs_imm_plus_one)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Slti(dst, lhs, rhs_imm_plus_one); - return (cond == kCondGT); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst64(rhs_reg, rhs_imm); - } - __ Slt(dst, rhs_reg, lhs); - return (cond == kCondLE); - } - - case kCondB: - case kCondAE: - if (use_imm && IsInt<16>(rhs_imm)) { - // Sltiu sign-extends its 16-bit immediate operand before - // the comparison and thus lets us compare directly with - // unsigned values in the ranges [0, 0x7fff] and - // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. - __ Sltiu(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst64(rhs_reg, rhs_imm); - } - __ Sltu(dst, lhs, rhs_reg); - } - return (cond == kCondAE); - - case kCondBE: - case kCondA: - if (use_imm && (rhs_imm_plus_one != 0) && IsInt<16>(rhs_imm_plus_one)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - // Note that this only works if rhs + 1 does not overflow - // to 0, hence the check above. - // Sltiu sign-extends its 16-bit immediate operand before - // the comparison and thus lets us compare directly with - // unsigned values in the ranges [0, 0x7fff] and - // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. - __ Sltiu(dst, lhs, rhs_imm_plus_one); - return (cond == kCondA); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst64(rhs_reg, rhs_imm); - } - __ Sltu(dst, rhs_reg, lhs); - return (cond == kCondBE); - } - } -} - -void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition cond, - bool is64bit, - LocationSummary* locations, - Mips64Label* label) { - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = locations->InAt(1); - GpuRegister rhs_reg = ZERO; - int64_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - if (is64bit) { - rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); - } else { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } - } else { - rhs_reg = rhs_location.AsRegister<GpuRegister>(); - } - - if (use_imm && rhs_imm == 0) { - switch (cond) { - case kCondEQ: - case kCondBE: // <= 0 if zero - __ Beqzc(lhs, label); - break; - case kCondNE: - case kCondA: // > 0 if non-zero - __ Bnezc(lhs, label); - break; - case kCondLT: - __ Bltzc(lhs, label); - break; - case kCondGE: - __ Bgezc(lhs, label); - break; - case kCondLE: - __ Blezc(lhs, label); - break; - case kCondGT: - __ Bgtzc(lhs, label); - break; - case kCondB: // always false - break; - case kCondAE: // always true - __ Bc(label); - break; - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst64(rhs_reg, rhs_imm); - } - switch (cond) { - case kCondEQ: - __ Beqc(lhs, rhs_reg, label); - break; - case kCondNE: - __ Bnec(lhs, rhs_reg, label); - break; - case kCondLT: - __ Bltc(lhs, rhs_reg, label); - break; - case kCondGE: - __ Bgec(lhs, rhs_reg, label); - break; - case kCondLE: - __ Bgec(rhs_reg, lhs, label); - break; - case kCondGT: - __ Bltc(rhs_reg, lhs, label); - break; - case kCondB: - __ Bltuc(lhs, rhs_reg, label); - break; - case kCondAE: - __ Bgeuc(lhs, rhs_reg, label); - break; - case kCondBE: - __ Bgeuc(rhs_reg, lhs, label); - break; - case kCondA: - __ Bltuc(rhs_reg, lhs, label); - break; - } - } -} - -void InstructionCodeGeneratorMIPS64::GenerateFpCompare(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* locations) { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); - if (type == DataType::Type::kFloat32) { - switch (cond) { - case kCondEQ: - __ CmpEqS(FTMP, lhs, rhs); - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondNE: - __ CmpEqS(FTMP, lhs, rhs); - __ Mfc1(dst, FTMP); - __ Addiu(dst, dst, 1); - break; - case kCondLT: - if (gt_bias) { - __ CmpLtS(FTMP, lhs, rhs); - } else { - __ CmpUltS(FTMP, lhs, rhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondLE: - if (gt_bias) { - __ CmpLeS(FTMP, lhs, rhs); - } else { - __ CmpUleS(FTMP, lhs, rhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondGT: - if (gt_bias) { - __ CmpUltS(FTMP, rhs, lhs); - } else { - __ CmpLtS(FTMP, rhs, lhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondGE: - if (gt_bias) { - __ CmpUleS(FTMP, rhs, lhs); - } else { - __ CmpLeS(FTMP, rhs, lhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - default: - LOG(FATAL) << "Unexpected non-floating-point condition " << cond; - UNREACHABLE(); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat64); - switch (cond) { - case kCondEQ: - __ CmpEqD(FTMP, lhs, rhs); - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondNE: - __ CmpEqD(FTMP, lhs, rhs); - __ Mfc1(dst, FTMP); - __ Addiu(dst, dst, 1); - break; - case kCondLT: - if (gt_bias) { - __ CmpLtD(FTMP, lhs, rhs); - } else { - __ CmpUltD(FTMP, lhs, rhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondLE: - if (gt_bias) { - __ CmpLeD(FTMP, lhs, rhs); - } else { - __ CmpUleD(FTMP, lhs, rhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondGT: - if (gt_bias) { - __ CmpUltD(FTMP, rhs, lhs); - } else { - __ CmpLtD(FTMP, rhs, lhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - case kCondGE: - if (gt_bias) { - __ CmpUleD(FTMP, rhs, lhs); - } else { - __ CmpLeD(FTMP, rhs, lhs); - } - __ Mfc1(dst, FTMP); - __ Andi(dst, dst, 1); - break; - default: - LOG(FATAL) << "Unexpected non-floating-point condition " << cond; - UNREACHABLE(); - } - } -} - -bool InstructionCodeGeneratorMIPS64::MaterializeFpCompare(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* input_locations, - FpuRegister dst) { - FpuRegister lhs = input_locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister rhs = input_locations->InAt(1).AsFpuRegister<FpuRegister>(); - if (type == DataType::Type::kFloat32) { - switch (cond) { - case kCondEQ: - __ CmpEqS(dst, lhs, rhs); - return false; - case kCondNE: - __ CmpEqS(dst, lhs, rhs); - return true; - case kCondLT: - if (gt_bias) { - __ CmpLtS(dst, lhs, rhs); - } else { - __ CmpUltS(dst, lhs, rhs); - } - return false; - case kCondLE: - if (gt_bias) { - __ CmpLeS(dst, lhs, rhs); - } else { - __ CmpUleS(dst, lhs, rhs); - } - return false; - case kCondGT: - if (gt_bias) { - __ CmpUltS(dst, rhs, lhs); - } else { - __ CmpLtS(dst, rhs, lhs); - } - return false; - case kCondGE: - if (gt_bias) { - __ CmpUleS(dst, rhs, lhs); - } else { - __ CmpLeS(dst, rhs, lhs); - } - return false; - default: - LOG(FATAL) << "Unexpected non-floating-point condition " << cond; - UNREACHABLE(); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat64); - switch (cond) { - case kCondEQ: - __ CmpEqD(dst, lhs, rhs); - return false; - case kCondNE: - __ CmpEqD(dst, lhs, rhs); - return true; - case kCondLT: - if (gt_bias) { - __ CmpLtD(dst, lhs, rhs); - } else { - __ CmpUltD(dst, lhs, rhs); - } - return false; - case kCondLE: - if (gt_bias) { - __ CmpLeD(dst, lhs, rhs); - } else { - __ CmpUleD(dst, lhs, rhs); - } - return false; - case kCondGT: - if (gt_bias) { - __ CmpUltD(dst, rhs, lhs); - } else { - __ CmpLtD(dst, rhs, lhs); - } - return false; - case kCondGE: - if (gt_bias) { - __ CmpUleD(dst, rhs, lhs); - } else { - __ CmpLeD(dst, rhs, lhs); - } - return false; - default: - LOG(FATAL) << "Unexpected non-floating-point condition " << cond; - UNREACHABLE(); - } - } -} - -void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* locations, - Mips64Label* label) { - FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); - if (type == DataType::Type::kFloat32) { - switch (cond) { - case kCondEQ: - __ CmpEqS(FTMP, lhs, rhs); - __ Bc1nez(FTMP, label); - break; - case kCondNE: - __ CmpEqS(FTMP, lhs, rhs); - __ Bc1eqz(FTMP, label); - break; - case kCondLT: - if (gt_bias) { - __ CmpLtS(FTMP, lhs, rhs); - } else { - __ CmpUltS(FTMP, lhs, rhs); - } - __ Bc1nez(FTMP, label); - break; - case kCondLE: - if (gt_bias) { - __ CmpLeS(FTMP, lhs, rhs); - } else { - __ CmpUleS(FTMP, lhs, rhs); - } - __ Bc1nez(FTMP, label); - break; - case kCondGT: - if (gt_bias) { - __ CmpUltS(FTMP, rhs, lhs); - } else { - __ CmpLtS(FTMP, rhs, lhs); - } - __ Bc1nez(FTMP, label); - break; - case kCondGE: - if (gt_bias) { - __ CmpUleS(FTMP, rhs, lhs); - } else { - __ CmpLeS(FTMP, rhs, lhs); - } - __ Bc1nez(FTMP, label); - break; - default: - LOG(FATAL) << "Unexpected non-floating-point condition"; - UNREACHABLE(); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat64); - switch (cond) { - case kCondEQ: - __ CmpEqD(FTMP, lhs, rhs); - __ Bc1nez(FTMP, label); - break; - case kCondNE: - __ CmpEqD(FTMP, lhs, rhs); - __ Bc1eqz(FTMP, label); - break; - case kCondLT: - if (gt_bias) { - __ CmpLtD(FTMP, lhs, rhs); - } else { - __ CmpUltD(FTMP, lhs, rhs); - } - __ Bc1nez(FTMP, label); - break; - case kCondLE: - if (gt_bias) { - __ CmpLeD(FTMP, lhs, rhs); - } else { - __ CmpUleD(FTMP, lhs, rhs); - } - __ Bc1nez(FTMP, label); - break; - case kCondGT: - if (gt_bias) { - __ CmpUltD(FTMP, rhs, lhs); - } else { - __ CmpLtD(FTMP, rhs, lhs); - } - __ Bc1nez(FTMP, label); - break; - case kCondGE: - if (gt_bias) { - __ CmpUleD(FTMP, rhs, lhs); - } else { - __ CmpLeD(FTMP, rhs, lhs); - } - __ Bc1nez(FTMP, label); - break; - default: - LOG(FATAL) << "Unexpected non-floating-point condition"; - UNREACHABLE(); - } - } -} - -void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction, - size_t condition_input_index, - Mips64Label* true_target, - Mips64Label* false_target) { - HInstruction* cond = instruction->InputAt(condition_input_index); - - if (true_target == nullptr && false_target == nullptr) { - // Nothing to do. The code always falls through. - return; - } else if (cond->IsIntConstant()) { - // Constant condition, statically compared against "true" (integer value 1). - if (cond->AsIntConstant()->IsTrue()) { - if (true_target != nullptr) { - __ Bc(true_target); - } - } else { - DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); - if (false_target != nullptr) { - __ Bc(false_target); - } - } - return; - } - - // The following code generates these patterns: - // (1) true_target == nullptr && false_target != nullptr - // - opposite condition true => branch to false_target - // (2) true_target != nullptr && false_target == nullptr - // - condition true => branch to true_target - // (3) true_target != nullptr && false_target != nullptr - // - condition true => branch to true_target - // - branch to false_target - if (IsBooleanValueOrMaterializedCondition(cond)) { - // The condition instruction has been materialized, compare the output to 0. - Location cond_val = instruction->GetLocations()->InAt(condition_input_index); - DCHECK(cond_val.IsRegister()); - if (true_target == nullptr) { - __ Beqzc(cond_val.AsRegister<GpuRegister>(), false_target); - } else { - __ Bnezc(cond_val.AsRegister<GpuRegister>(), true_target); - } - } else { - // The condition instruction has not been materialized, use its inputs as - // the comparison and its condition as the branch condition. - HCondition* condition = cond->AsCondition(); - DataType::Type type = condition->InputAt(0)->GetType(); - LocationSummary* locations = cond->GetLocations(); - IfCondition if_cond = condition->GetCondition(); - Mips64Label* branch_target = true_target; - - if (true_target == nullptr) { - if_cond = condition->GetOppositeCondition(); - branch_target = false_target; - } - - switch (type) { - default: - GenerateIntLongCompareAndBranch(if_cond, /* is64bit= */ false, locations, branch_target); - break; - case DataType::Type::kInt64: - GenerateIntLongCompareAndBranch(if_cond, /* is64bit= */ true, locations, branch_target); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target); - break; - } - } - - // If neither branch falls through (case 3), the conditional branch to `true_target` - // was already emitted (case 2) and we need to emit a jump to `false_target`. - if (true_target != nullptr && false_target != nullptr) { - __ Bc(false_target); - } -} - -void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); - if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { - locations->SetInAt(0, Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { - HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); - HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); - Mips64Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? - nullptr : codegen_->GetLabelOf(true_successor); - Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? - nullptr : codegen_->GetLabelOf(false_successor); - GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); -} - -void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) - LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - InvokeRuntimeCallingConvention calling_convention; - RegisterSet caller_saves = RegisterSet::Empty(); - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); - if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { - locations->SetInAt(0, Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCodeMIPS64* slow_path = - deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS64>(deoptimize); - GenerateTestAndBranch(deoptimize, - /* condition_input_index= */ 0, - slow_path->GetEntryLabel(), - /* false_target= */ nullptr); -} - -// This function returns true if a conditional move can be generated for HSelect. -// Otherwise it returns false and HSelect must be implemented in terms of conditonal -// branches and regular moves. -// -// If `locations_to_set` isn't nullptr, its inputs and outputs are set for HSelect. -// -// While determining feasibility of a conditional move and setting inputs/outputs -// are two distinct tasks, this function does both because they share quite a bit -// of common logic. -static bool CanMoveConditionally(HSelect* select, LocationSummary* locations_to_set) { - bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition()); - HInstruction* cond = select->InputAt(/* i= */ 2); - HCondition* condition = cond->AsCondition(); - - DataType::Type cond_type = - materialized ? DataType::Type::kInt32 : condition->InputAt(0)->GetType(); - DataType::Type dst_type = select->GetType(); - - HConstant* cst_true_value = select->GetTrueValue()->AsConstant(); - HConstant* cst_false_value = select->GetFalseValue()->AsConstant(); - bool is_true_value_zero_constant = - (cst_true_value != nullptr && cst_true_value->IsZeroBitPattern()); - bool is_false_value_zero_constant = - (cst_false_value != nullptr && cst_false_value->IsZeroBitPattern()); - - bool can_move_conditionally = false; - bool use_const_for_false_in = false; - bool use_const_for_true_in = false; - - if (!cond->IsConstant()) { - if (!DataType::IsFloatingPointType(cond_type)) { - if (!DataType::IsFloatingPointType(dst_type)) { - // Moving int/long on int/long condition. - if (is_true_value_zero_constant) { - // seleqz out_reg, false_reg, cond_reg - can_move_conditionally = true; - use_const_for_true_in = true; - } else if (is_false_value_zero_constant) { - // selnez out_reg, true_reg, cond_reg - can_move_conditionally = true; - use_const_for_false_in = true; - } else if (materialized) { - // Not materializing unmaterialized int conditions - // to keep the instruction count low. - // selnez AT, true_reg, cond_reg - // seleqz TMP, false_reg, cond_reg - // or out_reg, AT, TMP - can_move_conditionally = true; - } - } else { - // Moving float/double on int/long condition. - if (materialized) { - // Not materializing unmaterialized int conditions - // to keep the instruction count low. - can_move_conditionally = true; - if (is_true_value_zero_constant) { - // sltu TMP, ZERO, cond_reg - // mtc1 TMP, temp_cond_reg - // seleqz.fmt out_reg, false_reg, temp_cond_reg - use_const_for_true_in = true; - } else if (is_false_value_zero_constant) { - // sltu TMP, ZERO, cond_reg - // mtc1 TMP, temp_cond_reg - // selnez.fmt out_reg, true_reg, temp_cond_reg - use_const_for_false_in = true; - } else { - // sltu TMP, ZERO, cond_reg - // mtc1 TMP, temp_cond_reg - // sel.fmt temp_cond_reg, false_reg, true_reg - // mov.fmt out_reg, temp_cond_reg - } - } - } - } else { - if (!DataType::IsFloatingPointType(dst_type)) { - // Moving int/long on float/double condition. - can_move_conditionally = true; - if (is_true_value_zero_constant) { - // mfc1 TMP, temp_cond_reg - // seleqz out_reg, false_reg, TMP - use_const_for_true_in = true; - } else if (is_false_value_zero_constant) { - // mfc1 TMP, temp_cond_reg - // selnez out_reg, true_reg, TMP - use_const_for_false_in = true; - } else { - // mfc1 TMP, temp_cond_reg - // selnez AT, true_reg, TMP - // seleqz TMP, false_reg, TMP - // or out_reg, AT, TMP - } - } else { - // Moving float/double on float/double condition. - can_move_conditionally = true; - if (is_true_value_zero_constant) { - // seleqz.fmt out_reg, false_reg, temp_cond_reg - use_const_for_true_in = true; - } else if (is_false_value_zero_constant) { - // selnez.fmt out_reg, true_reg, temp_cond_reg - use_const_for_false_in = true; - } else { - // sel.fmt temp_cond_reg, false_reg, true_reg - // mov.fmt out_reg, temp_cond_reg - } - } - } - } - - if (can_move_conditionally) { - DCHECK(!use_const_for_false_in || !use_const_for_true_in); - } else { - DCHECK(!use_const_for_false_in); - DCHECK(!use_const_for_true_in); - } - - if (locations_to_set != nullptr) { - if (use_const_for_false_in) { - locations_to_set->SetInAt(0, Location::ConstantLocation(cst_false_value)); - } else { - locations_to_set->SetInAt(0, - DataType::IsFloatingPointType(dst_type) - ? Location::RequiresFpuRegister() - : Location::RequiresRegister()); - } - if (use_const_for_true_in) { - locations_to_set->SetInAt(1, Location::ConstantLocation(cst_true_value)); - } else { - locations_to_set->SetInAt(1, - DataType::IsFloatingPointType(dst_type) - ? Location::RequiresFpuRegister() - : Location::RequiresRegister()); - } - if (materialized) { - locations_to_set->SetInAt(2, Location::RequiresRegister()); - } - - if (can_move_conditionally) { - locations_to_set->SetOut(DataType::IsFloatingPointType(dst_type) - ? Location::RequiresFpuRegister() - : Location::RequiresRegister()); - } else { - locations_to_set->SetOut(Location::SameAsFirstInput()); - } - } - - return can_move_conditionally; -} - - -void InstructionCodeGeneratorMIPS64::GenConditionalMove(HSelect* select) { - LocationSummary* locations = select->GetLocations(); - Location dst = locations->Out(); - Location false_src = locations->InAt(0); - Location true_src = locations->InAt(1); - HInstruction* cond = select->InputAt(/* i= */ 2); - GpuRegister cond_reg = TMP; - FpuRegister fcond_reg = FTMP; - DataType::Type cond_type = DataType::Type::kInt32; - bool cond_inverted = false; - DataType::Type dst_type = select->GetType(); - - if (IsBooleanValueOrMaterializedCondition(cond)) { - cond_reg = locations->InAt(/* at= */ 2).AsRegister<GpuRegister>(); - } else { - HCondition* condition = cond->AsCondition(); - LocationSummary* cond_locations = cond->GetLocations(); - IfCondition if_cond = condition->GetCondition(); - cond_type = condition->InputAt(0)->GetType(); - switch (cond_type) { - default: - cond_inverted = MaterializeIntLongCompare(if_cond, - /* is64bit= */ false, - cond_locations, - cond_reg); - break; - case DataType::Type::kInt64: - cond_inverted = MaterializeIntLongCompare(if_cond, - /* is64bit= */ true, - cond_locations, - cond_reg); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - cond_inverted = MaterializeFpCompare(if_cond, - condition->IsGtBias(), - cond_type, - cond_locations, - fcond_reg); - break; - } - } - - if (true_src.IsConstant()) { - DCHECK(true_src.GetConstant()->IsZeroBitPattern()); - } - if (false_src.IsConstant()) { - DCHECK(false_src.GetConstant()->IsZeroBitPattern()); - } - - switch (dst_type) { - default: - if (DataType::IsFloatingPointType(cond_type)) { - __ Mfc1(cond_reg, fcond_reg); - } - if (true_src.IsConstant()) { - if (cond_inverted) { - __ Selnez(dst.AsRegister<GpuRegister>(), false_src.AsRegister<GpuRegister>(), cond_reg); - } else { - __ Seleqz(dst.AsRegister<GpuRegister>(), false_src.AsRegister<GpuRegister>(), cond_reg); - } - } else if (false_src.IsConstant()) { - if (cond_inverted) { - __ Seleqz(dst.AsRegister<GpuRegister>(), true_src.AsRegister<GpuRegister>(), cond_reg); - } else { - __ Selnez(dst.AsRegister<GpuRegister>(), true_src.AsRegister<GpuRegister>(), cond_reg); - } - } else { - DCHECK_NE(cond_reg, AT); - if (cond_inverted) { - __ Seleqz(AT, true_src.AsRegister<GpuRegister>(), cond_reg); - __ Selnez(TMP, false_src.AsRegister<GpuRegister>(), cond_reg); - } else { - __ Selnez(AT, true_src.AsRegister<GpuRegister>(), cond_reg); - __ Seleqz(TMP, false_src.AsRegister<GpuRegister>(), cond_reg); - } - __ Or(dst.AsRegister<GpuRegister>(), AT, TMP); - } - break; - case DataType::Type::kFloat32: { - if (!DataType::IsFloatingPointType(cond_type)) { - // sel*.fmt tests bit 0 of the condition register, account for that. - __ Sltu(TMP, ZERO, cond_reg); - __ Mtc1(TMP, fcond_reg); - } - FpuRegister dst_reg = dst.AsFpuRegister<FpuRegister>(); - if (true_src.IsConstant()) { - FpuRegister src_reg = false_src.AsFpuRegister<FpuRegister>(); - if (cond_inverted) { - __ SelnezS(dst_reg, src_reg, fcond_reg); - } else { - __ SeleqzS(dst_reg, src_reg, fcond_reg); - } - } else if (false_src.IsConstant()) { - FpuRegister src_reg = true_src.AsFpuRegister<FpuRegister>(); - if (cond_inverted) { - __ SeleqzS(dst_reg, src_reg, fcond_reg); - } else { - __ SelnezS(dst_reg, src_reg, fcond_reg); - } - } else { - if (cond_inverted) { - __ SelS(fcond_reg, - true_src.AsFpuRegister<FpuRegister>(), - false_src.AsFpuRegister<FpuRegister>()); - } else { - __ SelS(fcond_reg, - false_src.AsFpuRegister<FpuRegister>(), - true_src.AsFpuRegister<FpuRegister>()); - } - __ MovS(dst_reg, fcond_reg); - } - break; - } - case DataType::Type::kFloat64: { - if (!DataType::IsFloatingPointType(cond_type)) { - // sel*.fmt tests bit 0 of the condition register, account for that. - __ Sltu(TMP, ZERO, cond_reg); - __ Mtc1(TMP, fcond_reg); - } - FpuRegister dst_reg = dst.AsFpuRegister<FpuRegister>(); - if (true_src.IsConstant()) { - FpuRegister src_reg = false_src.AsFpuRegister<FpuRegister>(); - if (cond_inverted) { - __ SelnezD(dst_reg, src_reg, fcond_reg); - } else { - __ SeleqzD(dst_reg, src_reg, fcond_reg); - } - } else if (false_src.IsConstant()) { - FpuRegister src_reg = true_src.AsFpuRegister<FpuRegister>(); - if (cond_inverted) { - __ SeleqzD(dst_reg, src_reg, fcond_reg); - } else { - __ SelnezD(dst_reg, src_reg, fcond_reg); - } - } else { - if (cond_inverted) { - __ SelD(fcond_reg, - true_src.AsFpuRegister<FpuRegister>(), - false_src.AsFpuRegister<FpuRegister>()); - } else { - __ SelD(fcond_reg, - false_src.AsFpuRegister<FpuRegister>(), - true_src.AsFpuRegister<FpuRegister>()); - } - __ MovD(dst_reg, fcond_reg); - } - break; - } - } -} - -void LocationsBuilderMIPS64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) - LocationSummary(flag, LocationSummary::kNoCall); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorMIPS64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { - __ LoadFromOffset(kLoadWord, - flag->GetLocations()->Out().AsRegister<GpuRegister>(), - SP, - codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); -} - -void LocationsBuilderMIPS64::VisitSelect(HSelect* select) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); - CanMoveConditionally(select, locations); -} - -void InstructionCodeGeneratorMIPS64::VisitSelect(HSelect* select) { - if (CanMoveConditionally(select, /* locations_to_set= */ nullptr)) { - GenConditionalMove(select); - } else { - LocationSummary* locations = select->GetLocations(); - Mips64Label false_target; - GenerateTestAndBranch(select, - /* condition_input_index= */ 2, - /* true_target= */ nullptr, - &false_target); - codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); - __ Bind(&false_target); - } -} - -void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetAllocator()) LocationSummary(info); -} - -void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo*) { - // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. -} - -void CodeGeneratorMIPS64::GenerateNop() { - __ Nop(); -} - -void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, - const FieldInfo& field_info) { - DataType::Type field_type = field_info.GetFieldType(); - bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (field_type == DataType::Type::kReference); - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, - object_field_get_with_read_barrier - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall); - if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - } - locations->SetInAt(0, Location::RequiresRegister()); - if (DataType::IsFloatingPointType(instruction->GetType())) { - locations->SetOut(Location::RequiresFpuRegister()); - } else { - // The output overlaps in the case of an object field get with - // read barriers enabled: we do not want the move to overwrite the - // object's location, as we need it to emit the read barrier. - locations->SetOut(Location::RequiresRegister(), - object_field_get_with_read_barrier - ? Location::kOutputOverlap - : Location::kNoOutputOverlap); - } - if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier. - if (!kBakerReadBarrierThunksEnableForFields) { - locations->AddTemp(Location::RequiresRegister()); - } - } -} - -void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, - const FieldInfo& field_info) { - DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); - DataType::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - Location obj_loc = locations->InAt(0); - GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - Location dst_loc = locations->Out(); - LoadOperandType load_type = kLoadUnsignedByte; - bool is_volatile = field_info.IsVolatile(); - uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - auto null_checker = GetImplicitNullChecker(instruction, codegen_); - - switch (type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - load_type = kLoadUnsignedByte; - break; - case DataType::Type::kInt8: - load_type = kLoadSignedByte; - break; - case DataType::Type::kUint16: - load_type = kLoadUnsignedHalfword; - break; - case DataType::Type::kInt16: - load_type = kLoadSignedHalfword; - break; - case DataType::Type::kInt32: - case DataType::Type::kFloat32: - load_type = kLoadWord; - break; - case DataType::Type::kInt64: - case DataType::Type::kFloat64: - load_type = kLoadDoubleword; - break; - case DataType::Type::kReference: - load_type = kLoadUnsignedWord; - break; - case DataType::Type::kUint32: - case DataType::Type::kUint64: - case DataType::Type::kVoid: - LOG(FATAL) << "Unreachable type " << type; - UNREACHABLE(); - } - if (!DataType::IsFloatingPointType(type)) { - DCHECK(dst_loc.IsRegister()); - GpuRegister dst = dst_loc.AsRegister<GpuRegister>(); - if (type == DataType::Type::kReference) { - // /* HeapReference<Object> */ dst = *(obj + offset) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = - kBakerReadBarrierThunksEnableForFields ? Location::NoLocation() : locations->GetTemp(0); - // Note that a potential implicit null check is handled in this - // CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier call. - codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, - dst_loc, - obj, - offset, - temp_loc, - /* needs_null_check= */ true); - if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kLoadAny); - } - } else { - __ LoadFromOffset(kLoadUnsignedWord, dst, obj, offset, null_checker); - if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kLoadAny); - } - // If read barriers are enabled, emit read barriers other than - // Baker's using a slow path (and also unpoison the loaded - // reference, if heap poisoning is enabled). - codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset); - } - } else { - __ LoadFromOffset(load_type, dst, obj, offset, null_checker); - } - } else { - DCHECK(dst_loc.IsFpuRegister()); - FpuRegister dst = dst_loc.AsFpuRegister<FpuRegister>(); - __ LoadFpuFromOffset(load_type, dst, obj, offset, null_checker); - } - - // Memory barriers, in the case of references, are handled in the - // previous switch statement. - if (is_volatile && (type != DataType::Type::kReference)) { - GenerateMemoryBarrier(MemBarrierKind::kLoadAny); - } -} - -void LocationsBuilderMIPS64::HandleFieldSet(HInstruction* instruction, - const FieldInfo& field_info ATTRIBUTE_UNUSED) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) { - locations->SetInAt(1, FpuRegisterOrConstantForStore(instruction->InputAt(1))); - } else { - locations->SetInAt(1, RegisterOrZeroConstant(instruction->InputAt(1))); - } -} - -void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, - const FieldInfo& field_info, - bool value_can_be_null) { - DataType::Type type = field_info.GetFieldType(); - LocationSummary* locations = instruction->GetLocations(); - GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); - Location value_location = locations->InAt(1); - StoreOperandType store_type = kStoreByte; - bool is_volatile = field_info.IsVolatile(); - uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1)); - auto null_checker = GetImplicitNullChecker(instruction, codegen_); - - switch (type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - store_type = kStoreByte; - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - store_type = kStoreHalfword; - break; - case DataType::Type::kInt32: - case DataType::Type::kFloat32: - case DataType::Type::kReference: - store_type = kStoreWord; - break; - case DataType::Type::kInt64: - case DataType::Type::kFloat64: - store_type = kStoreDoubleword; - break; - case DataType::Type::kUint32: - case DataType::Type::kUint64: - case DataType::Type::kVoid: - LOG(FATAL) << "Unreachable type " << type; - UNREACHABLE(); - } - - if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); - } - - if (value_location.IsConstant()) { - int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker); - } else { - if (!DataType::IsFloatingPointType(type)) { - DCHECK(value_location.IsRegister()); - GpuRegister src = value_location.AsRegister<GpuRegister>(); - if (kPoisonHeapReferences && needs_write_barrier) { - // Note that in the case where `value` is a null reference, - // we do not enter this block, as a null reference does not - // need poisoning. - DCHECK_EQ(type, DataType::Type::kReference); - __ PoisonHeapReference(TMP, src); - __ StoreToOffset(store_type, TMP, obj, offset, null_checker); - } else { - __ StoreToOffset(store_type, src, obj, offset, null_checker); - } - } else { - DCHECK(value_location.IsFpuRegister()); - FpuRegister src = value_location.AsFpuRegister<FpuRegister>(); - __ StoreFpuToOffset(store_type, src, obj, offset, null_checker); - } - } - - if (needs_write_barrier) { - DCHECK(value_location.IsRegister()); - GpuRegister src = value_location.AsRegister<GpuRegister>(); - codegen_->MarkGCCard(obj, src, value_can_be_null); - } - - if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); - } -} - -void LocationsBuilderMIPS64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { - HandleFieldGet(instruction, instruction->GetFieldInfo()); -} - -void InstructionCodeGeneratorMIPS64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { - HandleFieldGet(instruction, instruction->GetFieldInfo()); -} - -void LocationsBuilderMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); -} - -void InstructionCodeGeneratorMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); -} - -void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister( - HInstruction* instruction, - Location out, - uint32_t offset, - Location maybe_temp, - ReadBarrierOption read_barrier_option) { - GpuRegister out_reg = out.AsRegister<GpuRegister>(); - if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); - if (!kUseBakerReadBarrier || !kBakerReadBarrierThunksEnableForFields) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; - } - if (kUseBakerReadBarrier) { - // Load with fast path based Baker's read barrier. - // /* HeapReference<Object> */ out = *(out + offset) - codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, - out, - out_reg, - offset, - maybe_temp, - /* needs_null_check= */ false); - } else { - // Load with slow path based read barrier. - // Save the value of `out` into `maybe_temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - __ Move(maybe_temp.AsRegister<GpuRegister>(), out_reg); - // /* HeapReference<Object> */ out = *(out + offset) - __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset); - codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); - } - } else { - // Plain load with no read barrier. - // /* HeapReference<Object> */ out = *(out + offset) - __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset); - __ MaybeUnpoisonHeapReference(out_reg); - } -} - -void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters( - HInstruction* instruction, - Location out, - Location obj, - uint32_t offset, - Location maybe_temp, - ReadBarrierOption read_barrier_option) { - GpuRegister out_reg = out.AsRegister<GpuRegister>(); - GpuRegister obj_reg = obj.AsRegister<GpuRegister>(); - if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); - if (kUseBakerReadBarrier) { - if (!kBakerReadBarrierThunksEnableForFields) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; - } - // Load with fast path based Baker's read barrier. - // /* HeapReference<Object> */ out = *(obj + offset) - codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, - out, - obj_reg, - offset, - maybe_temp, - /* needs_null_check= */ false); - } else { - // Load with slow path based read barrier. - // /* HeapReference<Object> */ out = *(obj + offset) - __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset); - codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); - } - } else { - // Plain load with no read barrier. - // /* HeapReference<Object> */ out = *(obj + offset) - __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset); - __ MaybeUnpoisonHeapReference(out_reg); - } -} - -static inline int GetBakerMarkThunkNumber(GpuRegister reg) { - static_assert(BAKER_MARK_INTROSPECTION_REGISTER_COUNT == 20, "Expecting equal"); - if (reg >= V0 && reg <= T2) { // 13 consequtive regs. - return reg - V0; - } else if (reg >= S2 && reg <= S7) { // 6 consequtive regs. - return 13 + (reg - S2); - } else if (reg == S8) { // One more. - return 19; - } - LOG(FATAL) << "Unexpected register " << reg; - UNREACHABLE(); -} - -static inline int GetBakerMarkFieldArrayThunkDisplacement(GpuRegister reg, bool short_offset) { - int num = GetBakerMarkThunkNumber(reg) + - (short_offset ? BAKER_MARK_INTROSPECTION_REGISTER_COUNT : 0); - return num * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE; -} - -static inline int GetBakerMarkGcRootThunkDisplacement(GpuRegister reg) { - return GetBakerMarkThunkNumber(reg) * BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE + - BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET; -} - -void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(HInstruction* instruction, - Location root, - GpuRegister obj, - uint32_t offset, - ReadBarrierOption read_barrier_option, - Mips64Label* label_low) { - if (label_low != nullptr) { - DCHECK_EQ(offset, 0x5678u); - } - GpuRegister root_reg = root.AsRegister<GpuRegister>(); - if (read_barrier_option == kWithReadBarrier) { - DCHECK(kEmitCompilerReadBarrier); - if (kUseBakerReadBarrier) { - // Fast path implementation of art::ReadBarrier::BarrierForRoot when - // Baker's read barrier are used: - if (kBakerReadBarrierThunksEnableForGcRoots) { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded GC root or not. Instead, we - // load into `temp` (T9) the read barrier mark introspection entrypoint. - // If `temp` is null, it means that `GetIsGcMarking()` is false, and - // vice versa. - // - // We use thunks for the slow path. That thunk checks the reference - // and jumps to the entrypoint if needed. - // - // temp = Thread::Current()->pReadBarrierMarkReg00 - // // AKA &art_quick_read_barrier_mark_introspection. - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { - // temp = &gc_root_thunk<root_reg> - // root = temp(root) - // } - - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0); - const int thunk_disp = GetBakerMarkGcRootThunkDisplacement(root_reg); - int16_t offset_low = Low16Bits(offset); - int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign - // extension in lwu. - bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); - GpuRegister base = short_offset ? obj : TMP; - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); - if (!short_offset) { - DCHECK(!label_low); - __ Daui(base, obj, offset_high); - } - Mips64Label skip_call; - __ Beqz(T9, &skip_call, /* is_bare= */ true); - if (label_low != nullptr) { - DCHECK(short_offset); - __ Bind(label_low); - } - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadUnsignedWord, root_reg, base, offset_low); // Single instruction - // in delay slot. - __ Jialc(T9, thunk_disp); - __ Bind(&skip_call); - } else { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded GC root or not. Instead, we - // load into `temp` (T9) the read barrier mark entry point corresponding - // to register `root`. If `temp` is null, it means that `GetIsGcMarking()` - // is false, and vice versa. - // - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // if (temp != null) { - // root = temp(root) - // } - - if (label_low != nullptr) { - __ Bind(label_low); - } - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // Slow path marking the GC root `root`. - Location temp = Location::RegisterLocation(T9); - SlowPathCodeMIPS64* slow_path = - new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS64( - instruction, - root, - /*entrypoint*/ temp); - codegen_->AddSlowPath(slow_path); - - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset); - __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } - } else { - if (label_low != nullptr) { - __ Bind(label_low); - } - // GC root loaded through a slow path for read barriers other - // than Baker's. - // /* GcRoot<mirror::Object>* */ root = obj + offset - __ Daddiu64(root_reg, obj, static_cast<int32_t>(offset)); - // /* mirror::Object* */ root = root->Read() - codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); - } - } else { - if (label_low != nullptr) { - __ Bind(label_low); - } - // Plain GC root load with no read barrier. - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); - // Note that GC roots are not affected by heap poisoning, thus we - // do not have to unpoison `root_reg` here. - } -} - -void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - GpuRegister obj, - uint32_t offset, - Location temp, - bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - - if (kBakerReadBarrierThunksEnableForFields) { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded reference or not. Instead, we - // load into `temp` (T9) the read barrier mark introspection entrypoint. - // If `temp` is null, it means that `GetIsGcMarking()` is false, and - // vice versa. - // - // We use thunks for the slow path. That thunk checks the reference - // and jumps to the entrypoint if needed. If the holder is not gray, - // it issues a load-load memory barrier and returns to the original - // reference load. - // - // temp = Thread::Current()->pReadBarrierMarkReg00 - // // AKA &art_quick_read_barrier_mark_introspection. - // if (temp != nullptr) { - // temp = &field_array_thunk<holder_reg> - // temp() - // } - // not_gray_return_address: - // // If the offset is too large to fit into the lw instruction, we - // // use an adjusted base register (TMP) here. This register - // // receives bits 16 ... 31 of the offset before the thunk invocation - // // and the thunk benefits from it. - // HeapReference<mirror::Object> reference = *(obj+offset); // Original reference load. - // gray_return_address: - - DCHECK(temp.IsInvalid()); - bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0); - // There may have or may have not been a null check if the field offset is smaller than - // the page size. - // There must've been a null check in case it's actually a load from an array. - // We will, however, perform an explicit null check in the thunk as it's easier to - // do it than not. - if (instruction->IsArrayGet()) { - DCHECK(!needs_null_check); - } - const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, short_offset); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); - GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); - Mips64Label skip_call; - if (short_offset) { - __ Beqzc(T9, &skip_call, /* is_bare= */ true); - __ Nop(); // In forbidden slot. - __ Jialc(T9, thunk_disp); - __ Bind(&skip_call); - // /* HeapReference<Object> */ ref = *(obj + offset) - __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset); // Single instruction. - } else { - int16_t offset_low = Low16Bits(offset); - int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lwu. - __ Beqz(T9, &skip_call, /* is_bare= */ true); - __ Daui(TMP, obj, offset_high); // In delay slot. - __ Jialc(T9, thunk_disp); - __ Bind(&skip_call); - // /* HeapReference<Object> */ ref = *(obj + offset) - __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset_low); // Single instruction. - } - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - __ MaybeUnpoisonHeapReference(ref_reg); - return; - } - - // /* HeapReference<Object> */ ref = *(obj + offset) - Location no_index = Location::NoLocation(); - ScaleFactor no_scale_factor = TIMES_1; - GenerateReferenceLoadWithBakerReadBarrier(instruction, - ref, - obj, - offset, - no_index, - no_scale_factor, - temp, - needs_null_check); -} - -void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - GpuRegister obj, - uint32_t data_offset, - Location index, - Location temp, - bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - ScaleFactor scale_factor = TIMES_4; - - if (kBakerReadBarrierThunksEnableForArrays) { - // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded reference or not. Instead, we - // load into `temp` (T9) the read barrier mark introspection entrypoint. - // If `temp` is null, it means that `GetIsGcMarking()` is false, and - // vice versa. - // - // We use thunks for the slow path. That thunk checks the reference - // and jumps to the entrypoint if needed. If the holder is not gray, - // it issues a load-load memory barrier and returns to the original - // reference load. - // - // temp = Thread::Current()->pReadBarrierMarkReg00 - // // AKA &art_quick_read_barrier_mark_introspection. - // if (temp != nullptr) { - // temp = &field_array_thunk<holder_reg> - // temp() - // } - // not_gray_return_address: - // // The element address is pre-calculated in the TMP register before the - // // thunk invocation and the thunk benefits from it. - // HeapReference<mirror::Object> reference = data[index]; // Original reference load. - // gray_return_address: - - DCHECK(temp.IsInvalid()); - DCHECK(index.IsValid()); - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0); - // We will not do the explicit null check in the thunk as some form of a null check - // must've been done earlier. - DCHECK(!needs_null_check); - const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset= */ false); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); - Mips64Label skip_call; - __ Beqz(T9, &skip_call, /* is_bare= */ true); - GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); - GpuRegister index_reg = index.AsRegister<GpuRegister>(); - __ Dlsa(TMP, index_reg, obj, scale_factor); // In delay slot. - __ Jialc(T9, thunk_disp); - __ Bind(&skip_call); - // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor)) - DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))) << data_offset; - __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, data_offset); // Single instruction. - __ MaybeUnpoisonHeapReference(ref_reg); - return; - } - - // /* HeapReference<Object> */ ref = - // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - GenerateReferenceLoadWithBakerReadBarrier(instruction, - ref, - obj, - data_offset, - index, - scale_factor, - temp, - needs_null_check); -} - -void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - GpuRegister obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - Location temp, - bool needs_null_check, - bool always_update_field) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - - // In slow path based read barriers, the read barrier call is - // inserted after the original load. However, in fast path based - // Baker's read barriers, we need to perform the load of - // mirror::Object::monitor_ *before* the original reference load. - // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. - // } - // - // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as it performs additional checks that we do - // not do here for performance reasons. - - GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); - GpuRegister temp_reg = temp.AsRegister<GpuRegister>(); - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - - // /* int32_t */ monitor = obj->monitor_ - __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - __ Sync(0); // Barrier to prevent load-load reordering. - - // The actual reference load. - if (index.IsValid()) { - // Load types involving an "index": ArrayGet, - // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject - // intrinsics. - // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) - if (index.IsConstant()) { - size_t computed_offset = - (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset; - __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, computed_offset); - } else { - GpuRegister index_reg = index.AsRegister<GpuRegister>(); - if (scale_factor == TIMES_1) { - __ Daddu(TMP, index_reg, obj); - } else { - __ Dlsa(TMP, index_reg, obj, scale_factor); - } - __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset); - } - } else { - // /* HeapReference<Object> */ ref = *(obj + offset) - __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset); - } - - // Object* ref = ref_addr->AsMirrorPtr() - __ MaybeUnpoisonHeapReference(ref_reg); - - // Slow path marking the object `ref` when it is gray. - SlowPathCodeMIPS64* slow_path; - if (always_update_field) { - // ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 only supports address - // of the form `obj + field_offset`, where `obj` is a register and - // `field_offset` is a register. Thus `offset` and `scale_factor` - // above are expected to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - slow_path = new (GetScopedAllocator()) - ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(instruction, - ref, - obj, - /* field_offset= */ index, - temp_reg); - } else { - slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS64(instruction, ref); - } - AddSlowPath(slow_path); - - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit into the sign bit (31) and - // performing a branch on less than zero. - static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size"); - __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift); - __ Bltzc(temp_reg, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); -} - -void CodeGeneratorMIPS64::GenerateReadBarrierSlow(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { - DCHECK(kEmitCompilerReadBarrier); - - // Insert a slow path based read barrier *after* the reference load. - // - // If heap poisoning is enabled, the unpoisoning of the loaded - // reference will be carried out by the runtime within the slow - // path. - // - // Note that `ref` currently does not get unpoisoned (when heap - // poisoning is enabled), which is alright as the `ref` argument is - // not used by the artReadBarrierSlow entry point. - // - // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. - SlowPathCodeMIPS64* slow_path = new (GetScopedAllocator()) - ReadBarrierForHeapReferenceSlowPathMIPS64(instruction, out, ref, obj, offset, index); - AddSlowPath(slow_path); - - __ Bc(slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); -} - -void CodeGeneratorMIPS64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { - if (kEmitCompilerReadBarrier) { - // Baker's read barriers shall be handled by the fast path - // (CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier). - DCHECK(!kUseBakerReadBarrier); - // If heap poisoning is enabled, unpoisoning will be taken care of - // by the runtime within the slow path. - GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); - } else if (kPoisonHeapReferences) { - __ UnpoisonHeapReference(out.AsRegister<GpuRegister>()); - } -} - -void CodeGeneratorMIPS64::GenerateReadBarrierForRootSlow(HInstruction* instruction, - Location out, - Location root) { - DCHECK(kEmitCompilerReadBarrier); - - // Insert a slow path based read barrier *after* the GC root load. - // - // Note that GC roots are not affected by heap poisoning, so we do - // not need to do anything special for this here. - SlowPathCodeMIPS64* slow_path = - new (GetScopedAllocator()) ReadBarrierForRootSlowPathMIPS64(instruction, out, root); - AddSlowPath(slow_path); - - __ Bc(slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); -} - -void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - bool baker_read_barrier_slow_path = false; - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: { - bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); - call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; - break; - } - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; - case TypeCheckKind::kBitstringCheck: - break; - } - - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); - if (baker_read_barrier_slow_path) { - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - } - locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } - // The output does overlap inputs. - // Note that TypeCheckSlowPathMIPS64 uses this register too. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); -} - -void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - LocationSummary* locations = instruction->GetLocations(); - Location obj_loc = locations->InAt(0); - GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - Location cls = locations->InAt(1); - Location out_loc = locations->Out(); - GpuRegister out = out_loc.AsRegister<GpuRegister>(); - const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); - DCHECK_LE(num_temps, 1u); - Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - Mips64Label done; - SlowPathCodeMIPS64* slow_path = nullptr; - - // Return 0 if `obj` is null. - // Avoid this check if we know `obj` is not null. - if (instruction->MustDoNullCheck()) { - __ Move(out, ZERO); - __ Beqzc(obj, &done); - } - - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: { - ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); - // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - read_barrier_option); - // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls.AsRegister<GpuRegister>()); - __ Sltiu(out, out, 1); - break; - } - - case TypeCheckKind::kAbstractClassCheck: { - ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); - // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - read_barrier_option); - // If the class is abstract, we eagerly fetch the super class of the - // object to avoid doing a comparison we know will fail. - Mips64Label loop; - __ Bind(&loop); - // /* HeapReference<Class> */ out = out->super_class_ - GenerateReferenceLoadOneRegister(instruction, - out_loc, - super_offset, - maybe_temp_loc, - read_barrier_option); - // If `out` is null, we use it for the result, and jump to `done`. - __ Beqzc(out, &done); - __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop); - __ LoadConst32(out, 1); - break; - } - - case TypeCheckKind::kClassHierarchyCheck: { - ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); - // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - read_barrier_option); - // Walk over the class hierarchy to find a match. - Mips64Label loop, success; - __ Bind(&loop); - __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); - // /* HeapReference<Class> */ out = out->super_class_ - GenerateReferenceLoadOneRegister(instruction, - out_loc, - super_offset, - maybe_temp_loc, - read_barrier_option); - __ Bnezc(out, &loop); - // If `out` is null, we use it for the result, and jump to `done`. - __ Bc(&done); - __ Bind(&success); - __ LoadConst32(out, 1); - break; - } - - case TypeCheckKind::kArrayObjectCheck: { - ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); - // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - read_barrier_option); - // Do an exact check. - Mips64Label success; - __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); - // Otherwise, we need to check that the object's class is a non-primitive array. - // /* HeapReference<Class> */ out = out->component_type_ - GenerateReferenceLoadOneRegister(instruction, - out_loc, - component_offset, - maybe_temp_loc, - read_barrier_option); - // If `out` is null, we use it for the result, and jump to `done`. - __ Beqzc(out, &done); - __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Sltiu(out, out, 1); - __ Bc(&done); - __ Bind(&success); - __ LoadConst32(out, 1); - break; - } - - case TypeCheckKind::kArrayCheck: { - // No read barrier since the slow path will retry upon failure. - // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - kWithoutReadBarrier); - DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( - instruction, /* is_fatal= */ false); - codegen_->AddSlowPath(slow_path); - __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); - __ LoadConst32(out, 1); - break; - } - - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: { - // Note that we indeed only call on slow path, but we always go - // into the slow path for the unresolved and interface check - // cases. - // - // We cannot directly call the InstanceofNonTrivial runtime - // entry point without resorting to a type checking slow path - // here (i.e. by calling InvokeRuntime directly), as it would - // require to assign fixed registers for the inputs of this - // HInstanceOf instruction (following the runtime calling - // convention), which might be cluttered by the potential first - // read barrier emission at the beginning of this method. - // - // TODO: Introduce a new runtime entry point taking the object - // to test (instead of its class) as argument, and let it deal - // with the read barrier issues. This will let us refactor this - // case of the `switch` code as it was previously (with a direct - // call to the runtime not using a type checking slow path). - // This should also be beneficial for the other cases above. - DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( - instruction, /* is_fatal= */ false); - codegen_->AddSlowPath(slow_path); - __ Bc(slow_path->GetEntryLabel()); - break; - } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, out); - __ Sltiu(out, out, 1); - break; - } - } - - __ Bind(&done); - - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } -} - -void LocationsBuilderMIPS64::VisitIntConstant(HIntConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); - locations->SetOut(Location::ConstantLocation(constant)); -} - -void InstructionCodeGeneratorMIPS64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { - // Will be generated at use site. -} - -void LocationsBuilderMIPS64::VisitNullConstant(HNullConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); - locations->SetOut(Location::ConstantLocation(constant)); -} - -void InstructionCodeGeneratorMIPS64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { - // Will be generated at use site. -} - -void LocationsBuilderMIPS64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { - // The trampoline uses the same calling convention as dex calling conventions, - // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain - // the method_idx. - HandleInvoke(invoke); -} - -void InstructionCodeGeneratorMIPS64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { - codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); -} - -void LocationsBuilderMIPS64::HandleInvoke(HInvoke* invoke) { - InvokeDexCallingConventionVisitorMIPS64 calling_convention_visitor; - CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); -} - -void LocationsBuilderMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) { - HandleInvoke(invoke); - // The register T0 is required to be used for the hidden argument in - // art_quick_imt_conflict_trampoline, so add the hidden argument. - invoke->GetLocations()->AddTemp(Location::RegisterLocation(T0)); -} - -void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) { - // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>(); - Location receiver = invoke->GetLocations()->InAt(0); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize); - - // Set the hidden argument. - __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<GpuRegister>(), - invoke->GetDexMethodIndex()); - - // temp = object->GetClass(); - if (receiver.IsStackSlot()) { - __ LoadFromOffset(kLoadUnsignedWord, temp, SP, receiver.GetStackIndex()); - __ LoadFromOffset(kLoadUnsignedWord, temp, temp, class_offset); - } else { - __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset); - } - codegen_->MaybeRecordImplicitNullCheck(invoke); - // Instead of simply (possibly) unpoisoning `temp` here, we should - // emit a read barrier for the previous class reference load. - // However this is not required in practice, as this is an - // intermediate/temporary reference and because the current - // concurrent copying collector keeps the from-space memory - // intact/accessible until the end of the marking phase (the - // concurrent copying collector may not in the future). - __ MaybeUnpoisonHeapReference(temp); - __ LoadFromOffset(kLoadDoubleword, temp, temp, - mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value()); - uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex(), kMips64PointerSize)); - // temp = temp->GetImtEntryAt(method_offset); - __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset); - // T9 = temp->GetEntryPoint(); - __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); - // T9(); - __ Jalr(T9); - __ Nop(); - DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); -} - -void LocationsBuilderMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_); - if (intrinsic.TryDispatch(invoke)) { - return; - } - - HandleInvoke(invoke); -} - -void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // Explicit clinit checks triggered by static invokes must have been pruned by - // art::PrepareForRegisterAllocation. - DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); - - IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_); - if (intrinsic.TryDispatch(invoke)) { - return; - } - - HandleInvoke(invoke); -} - -void LocationsBuilderMIPS64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { - HandleInvoke(invoke); -} - -void InstructionCodeGeneratorMIPS64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { - codegen_->GenerateInvokePolymorphicCall(invoke); -} - -void LocationsBuilderMIPS64::VisitInvokeCustom(HInvokeCustom* invoke) { - HandleInvoke(invoke); -} - -void InstructionCodeGeneratorMIPS64::VisitInvokeCustom(HInvokeCustom* invoke) { - codegen_->GenerateInvokeCustomCall(invoke); -} - -static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) { - if (invoke->GetLocations()->Intrinsified()) { - IntrinsicCodeGeneratorMIPS64 intrinsic(codegen); - intrinsic.Dispatch(invoke); - return true; - } - return false; -} - -HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) { - bool fallback_load = false; - switch (desired_string_load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageRelRo: - case HLoadString::LoadKind::kBssEntry: - DCHECK(!Runtime::Current()->UseJitCompilation()); - break; - case HLoadString::LoadKind::kJitBootImageAddress: - case HLoadString::LoadKind::kJitTableAddress: - DCHECK(Runtime::Current()->UseJitCompilation()); - break; - case HLoadString::LoadKind::kRuntimeCall: - break; - } - if (fallback_load) { - desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall; - } - return desired_string_load_kind; -} - -HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) { - bool fallback_load = false; - switch (desired_class_load_kind) { - case HLoadClass::LoadKind::kInvalid: - LOG(FATAL) << "UNREACHABLE"; - UNREACHABLE(); - case HLoadClass::LoadKind::kReferrersClass: - break; - case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageRelRo: - case HLoadClass::LoadKind::kBssEntry: - DCHECK(!Runtime::Current()->UseJitCompilation()); - break; - case HLoadClass::LoadKind::kJitBootImageAddress: - case HLoadClass::LoadKind::kJitTableAddress: - DCHECK(Runtime::Current()->UseJitCompilation()); - break; - case HLoadClass::LoadKind::kRuntimeCall: - break; - } - if (fallback_load) { - desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall; - } - return desired_class_load_kind; -} - -HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - ArtMethod* method ATTRIBUTE_UNUSED) { - // On MIPS64 we support all dispatch types. - return desired_dispatch_info; -} - -void CodeGeneratorMIPS64::GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { - // All registers are assumed to be correctly set up per the calling convention. - Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. - HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); - HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); - - switch (method_load_kind) { - case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { - // temp = thread->string_init_entrypoint - uint32_t offset = - GetThreadOffset<kMips64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); - __ LoadFromOffset(kLoadDoubleword, - temp.AsRegister<GpuRegister>(), - TR, - offset); - break; - } - case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - break; - case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { - DCHECK(GetCompilerOptions().IsBootImage()); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - NewBootImageMethodPatch(invoke->GetTargetMethod()); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high); - EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* imm16= */ 0x5678); - break; - } - case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { - uint32_t boot_image_offset = GetBootImageOffset(invoke); - PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset); - PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high); - EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. - __ Lwu(temp.AsRegister<GpuRegister>(), AT, /* imm16= */ 0x5678); - break; - } - case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { - PcRelativePatchInfo* info_high = NewMethodBssEntryPatch( - MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); - PcRelativePatchInfo* info_low = NewMethodBssEntryPatch( - MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()), info_high); - EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Ld(temp.AsRegister<GpuRegister>(), AT, /* imm16= */ 0x5678); - break; - } - case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: - __ LoadLiteral(temp.AsRegister<GpuRegister>(), - kLoadDoubleword, - DeduplicateUint64Literal(invoke->GetMethodAddress())); - break; - case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { - GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); - return; // No code pointer retrieval; the runtime performs the call directly. - } - } - - switch (code_ptr_location) { - case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Balc(&frame_entry_label_); - break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: - // T9 = callee_method->entry_point_from_quick_compiled_code_; - __ LoadFromOffset(kLoadDoubleword, - T9, - callee_method.AsRegister<GpuRegister>(), - ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kMips64PointerSize).Int32Value()); - // T9() - __ Jalr(T9); - __ Nop(); - break; - } - RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); - - DCHECK(!IsLeafMethod()); -} - -void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // Explicit clinit checks triggered by static invokes must have been pruned by - // art::PrepareForRegisterAllocation. - DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); - - if (TryGenerateIntrinsicCode(invoke, codegen_)) { - return; - } - - LocationSummary* locations = invoke->GetLocations(); - codegen_->GenerateStaticOrDirectCall(invoke, - locations->HasTemps() - ? locations->GetTemp(0) - : Location::NoLocation()); -} - -void CodeGeneratorMIPS64::GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { - // Use the calling convention instead of the location of the receiver, as - // intrinsics may have put the receiver in a different register. In the intrinsics - // slow path, the arguments have been moved to the right place, so here we are - // guaranteed that the receiver is the first register of the calling convention. - InvokeDexCallingConvention calling_convention; - GpuRegister receiver = calling_convention.GetRegisterAt(0); - - GpuRegister temp = temp_location.AsRegister<GpuRegister>(); - size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( - invoke->GetVTableIndex(), kMips64PointerSize).SizeValue(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize); - - // temp = object->GetClass(); - __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset); - MaybeRecordImplicitNullCheck(invoke); - // Instead of simply (possibly) unpoisoning `temp` here, we should - // emit a read barrier for the previous class reference load. - // However this is not required in practice, as this is an - // intermediate/temporary reference and because the current - // concurrent copying collector keeps the from-space memory - // intact/accessible until the end of the marking phase (the - // concurrent copying collector may not in the future). - __ MaybeUnpoisonHeapReference(temp); - // temp = temp->GetMethodAt(method_offset); - __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset); - // T9 = temp->GetEntryPoint(); - __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); - // T9(); - __ Jalr(T9); - __ Nop(); - RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); -} - -void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - if (TryGenerateIntrinsicCode(invoke, codegen_)) { - return; - } - - codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); - DCHECK(!codegen_->IsLeafMethod()); -} - -void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { - HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { - InvokeRuntimeCallingConvention calling_convention; - Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); - CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc); - return; - } - DCHECK(!cls->NeedsAccessCheck()); - - const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); - LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); - if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - } - if (load_kind == HLoadClass::LoadKind::kReferrersClass) { - locations->SetInAt(0, Location::RequiresRegister()); - } - locations->SetOut(Location::RequiresRegister()); - if (load_kind == HLoadClass::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { - // Rely on the type resolution or initialization and marking to save everything we need. - locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); - } else { - // For non-Baker read barriers we have a temp-clobbering call. - } - } -} - -// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not -// move. -void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { - HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { - codegen_->GenerateLoadClassRuntimeCall(cls); - return; - } - DCHECK(!cls->NeedsAccessCheck()); - - LocationSummary* locations = cls->GetLocations(); - Location out_loc = locations->Out(); - GpuRegister out = out_loc.AsRegister<GpuRegister>(); - GpuRegister current_method_reg = ZERO; - if (load_kind == HLoadClass::LoadKind::kReferrersClass || - load_kind == HLoadClass::LoadKind::kRuntimeCall) { - current_method_reg = locations->InAt(0).AsRegister<GpuRegister>(); - } - - const ReadBarrierOption read_barrier_option = cls->IsInBootImage() - ? kWithoutReadBarrier - : kCompilerReadBarrierOption; - bool generate_null_check = false; - switch (load_kind) { - case HLoadClass::LoadKind::kReferrersClass: - DCHECK(!cls->CanCallRuntime()); - DCHECK(!cls->MustGenerateClinitCheck()); - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad(cls, - out_loc, - current_method_reg, - ArtMethod::DeclaringClassOffset().Int32Value(), - read_barrier_option); - break; - case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Daddiu(out, AT, /* imm16= */ 0x5678); - break; - } - case HLoadClass::LoadKind::kBootImageRelRo: { - DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewBootImageRelRoPatch(boot_image_offset); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Lwu(out, AT, /* imm16= */ 0x5678); - break; - } - case HLoadClass::LoadKind::kBssEntry: { - CodeGeneratorMIPS64::PcRelativePatchInfo* bss_info_high = - codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex(), bss_info_high); - codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, out); - GenerateGcRootFieldLoad(cls, - out_loc, - out, - /* offset= */ 0x5678, - read_barrier_option, - &info_low->label); - generate_null_check = true; - break; - } - case HLoadClass::LoadKind::kJitBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); - DCHECK_NE(address, 0u); - __ LoadLiteral(out, - kLoadUnsignedWord, - codegen_->DeduplicateBootImageAddressLiteral(address)); - break; - } - case HLoadClass::LoadKind::kJitTableAddress: - __ LoadLiteral(out, - kLoadUnsignedWord, - codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), - cls->GetTypeIndex(), - cls->GetClass())); - GenerateGcRootFieldLoad(cls, out_loc, out, 0, read_barrier_option); - break; - case HLoadClass::LoadKind::kRuntimeCall: - case HLoadClass::LoadKind::kInvalid: - LOG(FATAL) << "UNREACHABLE"; - UNREACHABLE(); - } - - if (generate_null_check || cls->MustGenerateClinitCheck()) { - DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS64* slow_path = - new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64(cls, cls); - codegen_->AddSlowPath(slow_path); - if (generate_null_check) { - __ Beqzc(out, slow_path->GetEntryLabel()); - } - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); - } - } -} - -void LocationsBuilderMIPS64::VisitLoadMethodHandle(HLoadMethodHandle* load) { - InvokeRuntimeCallingConvention calling_convention; - Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); - CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, loc, loc); -} - -void InstructionCodeGeneratorMIPS64::VisitLoadMethodHandle(HLoadMethodHandle* load) { - codegen_->GenerateLoadMethodHandleRuntimeCall(load); -} - -void LocationsBuilderMIPS64::VisitLoadMethodType(HLoadMethodType* load) { - InvokeRuntimeCallingConvention calling_convention; - Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); - CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, loc, loc); -} - -void InstructionCodeGeneratorMIPS64::VisitLoadMethodType(HLoadMethodType* load) { - codegen_->GenerateLoadMethodTypeRuntimeCall(load); -} - -static int32_t GetExceptionTlsOffset() { - return Thread::ExceptionOffset<kMips64PointerSize>().Int32Value(); -} - -void LocationsBuilderMIPS64::VisitLoadException(HLoadException* load) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorMIPS64::VisitLoadException(HLoadException* load) { - GpuRegister out = load->GetLocations()->Out().AsRegister<GpuRegister>(); - __ LoadFromOffset(kLoadUnsignedWord, out, TR, GetExceptionTlsOffset()); -} - -void LocationsBuilderMIPS64::VisitClearException(HClearException* clear) { - new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); -} - -void InstructionCodeGeneratorMIPS64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { - __ StoreToOffset(kStoreWord, ZERO, TR, GetExceptionTlsOffset()); -} - -void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { - HLoadString::LoadKind load_kind = load->GetLoadKind(); - LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); - if (load_kind == HLoadString::LoadKind::kRuntimeCall) { - InvokeRuntimeCallingConvention calling_convention; - locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } else { - locations->SetOut(Location::RequiresRegister()); - if (load_kind == HLoadString::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { - // Rely on the pResolveString and marking to save everything we need. - locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); - } else { - // For non-Baker read barriers we have a temp-clobbering call. - } - } - } -} - -// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not -// move. -void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { - HLoadString::LoadKind load_kind = load->GetLoadKind(); - LocationSummary* locations = load->GetLocations(); - Location out_loc = locations->Out(); - GpuRegister out = out_loc.AsRegister<GpuRegister>(); - - switch (load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Daddiu(out, AT, /* imm16= */ 0x5678); - return; - } - case HLoadString::LoadKind::kBootImageRelRo: { - DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - uint32_t boot_image_offset = codegen_->GetBootImageOffset(load); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewBootImageRelRoPatch(boot_image_offset); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Lwu(out, AT, /* imm16= */ 0x5678); - return; - } - case HLoadString::LoadKind::kBssEntry: { - CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex(), info_high); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out); - GenerateGcRootFieldLoad(load, - out_loc, - out, - /* offset= */ 0x5678, - kCompilerReadBarrierOption, - &info_low->label); - SlowPathCodeMIPS64* slow_path = - new (codegen_->GetScopedAllocator()) LoadStringSlowPathMIPS64(load); - codegen_->AddSlowPath(slow_path); - __ Beqzc(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - return; - } - case HLoadString::LoadKind::kJitBootImageAddress: { - uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); - DCHECK_NE(address, 0u); - __ LoadLiteral(out, - kLoadUnsignedWord, - codegen_->DeduplicateBootImageAddressLiteral(address)); - return; - } - case HLoadString::LoadKind::kJitTableAddress: - __ LoadLiteral(out, - kLoadUnsignedWord, - codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), - load->GetStringIndex(), - load->GetString())); - GenerateGcRootFieldLoad(load, out_loc, out, 0, kCompilerReadBarrierOption); - return; - default: - break; - } - - // TODO: Re-add the compiler code to do string dex cache lookup again. - DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall); - InvokeRuntimeCallingConvention calling_convention; - DCHECK_EQ(calling_convention.GetRegisterAt(0), out); - __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); - codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); - CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); -} - -void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); - locations->SetOut(Location::ConstantLocation(constant)); -} - -void InstructionCodeGeneratorMIPS64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { - // Will be generated at use site. -} - -void LocationsBuilderMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, LocationSummary::kCallOnMainOnly); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); -} - -void InstructionCodeGeneratorMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) { - codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, - instruction, - instruction->GetDexPc()); - if (instruction->IsEnter()) { - CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); - } else { - CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); - } -} - -void LocationsBuilderMIPS64::VisitMul(HMul* mul) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); - switch (mul->GetResultType()) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); - } -} - -void InstructionCodeGeneratorMIPS64::VisitMul(HMul* instruction) { - DataType::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - if (type == DataType::Type::kInt32) - __ MulR6(dst, lhs, rhs); - else - __ Dmul(dst, lhs, rhs); - break; - } - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: { - FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); - FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); - if (type == DataType::Type::kFloat32) - __ MulS(dst, lhs, rhs); - else - __ MulD(dst, lhs, rhs); - break; - } - default: - LOG(FATAL) << "Unexpected mul type " << type; - } -} - -void LocationsBuilderMIPS64::VisitNeg(HNeg* neg) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); - switch (neg->GetResultType()) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); - } -} - -void InstructionCodeGeneratorMIPS64::VisitNeg(HNeg* instruction) { - DataType::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); - if (type == DataType::Type::kInt32) - __ Subu(dst, ZERO, src); - else - __ Dsubu(dst, ZERO, src); - break; - } - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: { - FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); - FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>(); - if (type == DataType::Type::kFloat32) - __ NegS(dst, src); - else - __ NegD(dst, src); - break; - } - default: - LOG(FATAL) << "Unexpected neg type " << type; - } -} - -void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, LocationSummary::kCallOnMainOnly); - InvokeRuntimeCallingConvention calling_convention; - locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); -} - -void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. - QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); - codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); - DCHECK(!codegen_->IsLeafMethod()); -} - -void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, LocationSummary::kCallOnMainOnly); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); -} - -void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); -} - -void LocationsBuilderMIPS64::VisitNot(HNot* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorMIPS64::VisitNot(HNot* instruction) { - DataType::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); - __ Nor(dst, src, ZERO); - break; - } - - default: - LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType(); - } -} - -void LocationsBuilderMIPS64::VisitBooleanNot(HBooleanNot* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorMIPS64::VisitBooleanNot(HBooleanNot* instruction) { - LocationSummary* locations = instruction->GetLocations(); - __ Xori(locations->Out().AsRegister<GpuRegister>(), - locations->InAt(0).AsRegister<GpuRegister>(), - 1); -} - -void LocationsBuilderMIPS64::VisitNullCheck(HNullCheck* instruction) { - LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); - locations->SetInAt(0, Location::RequiresRegister()); -} - -void CodeGeneratorMIPS64::GenerateImplicitNullCheck(HNullCheck* instruction) { - if (CanMoveNullCheckToUser(instruction)) { - return; - } - Location obj = instruction->GetLocations()->InAt(0); - - __ Lw(ZERO, obj.AsRegister<GpuRegister>(), 0); - RecordPcInfo(instruction, instruction->GetDexPc()); -} - -void CodeGeneratorMIPS64::GenerateExplicitNullCheck(HNullCheck* instruction) { - SlowPathCodeMIPS64* slow_path = - new (GetScopedAllocator()) NullCheckSlowPathMIPS64(instruction); - AddSlowPath(slow_path); - - Location obj = instruction->GetLocations()->InAt(0); - - __ Beqzc(obj.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); -} - -void InstructionCodeGeneratorMIPS64::VisitNullCheck(HNullCheck* instruction) { - codegen_->GenerateNullCheck(instruction); -} - -void LocationsBuilderMIPS64::VisitOr(HOr* instruction) { - HandleBinaryOp(instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitOr(HOr* instruction) { - HandleBinaryOp(instruction); -} - -void LocationsBuilderMIPS64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; -} - -void InstructionCodeGeneratorMIPS64::VisitParallelMove(HParallelMove* instruction) { - if (instruction->GetNext()->IsSuspendCheck() && - instruction->GetBlock()->GetLoopInformation() != nullptr) { - HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); - // The back edge will generate the suspend check. - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); - } - - codegen_->GetMoveResolver()->EmitNativeCode(instruction); -} - -void LocationsBuilderMIPS64::VisitParameterValue(HParameterValue* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); - if (location.IsStackSlot()) { - location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); - } else if (location.IsDoubleStackSlot()) { - location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); - } - locations->SetOut(location); -} - -void InstructionCodeGeneratorMIPS64::VisitParameterValue(HParameterValue* instruction - ATTRIBUTE_UNUSED) { - // Nothing to do, the parameter is already at its location. -} - -void LocationsBuilderMIPS64::VisitCurrentMethod(HCurrentMethod* instruction) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument)); -} - -void InstructionCodeGeneratorMIPS64::VisitCurrentMethod(HCurrentMethod* instruction - ATTRIBUTE_UNUSED) { - // Nothing to do, the method is already at its location. -} - -void LocationsBuilderMIPS64::VisitPhi(HPhi* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { - locations->SetInAt(i, Location::Any()); - } - locations->SetOut(Location::Any()); -} - -void InstructionCodeGeneratorMIPS64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; -} - -void LocationsBuilderMIPS64::VisitRem(HRem* rem) { - DataType::Type type = rem->GetResultType(); - LocationSummary::CallKind call_kind = - DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind); - - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(type)); - break; - } - - default: - LOG(FATAL) << "Unexpected rem type " << type; - } -} - -void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { - DataType::Type type = instruction->GetType(); - - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - GenerateDivRemIntegral(instruction); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: { - QuickEntrypointEnum entrypoint = - (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod; - codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); - if (type == DataType::Type::kFloat32) { - CheckEntrypointTypes<kQuickFmodf, float, float, float>(); - } else { - CheckEntrypointTypes<kQuickFmod, double, double, double>(); - } - break; - } - default: - LOG(FATAL) << "Unexpected rem type " << type; - } -} - -static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { - LocationSummary* locations = new (allocator) LocationSummary(minmax); - switch (minmax->GetResultType()) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - default: - LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); - } -} - -void InstructionCodeGeneratorMIPS64::GenerateMinMaxInt(LocationSummary* locations, bool is_min) { - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (lhs == rhs) { - if (out != lhs) { - __ Move(out, lhs); - } - } else { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always - // change the target (output) register. If the condition is true the - // output register gets the contents of the "rs" register; otherwise, - // the output register is set to zero. One consequence of this is - // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 - // needs to use a pair of SELEQZ/SELNEZ instructions. After - // executing this pair of instructions one of the output registers - // from the pair will necessarily contain zero. Then the code ORs the - // output registers from the SELEQZ/SELNEZ instructions to get the - // final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (out == lhs) { - __ Slt(AT, rhs, lhs); - if (is_min) { - __ Seleqz(out, lhs, AT); - __ Selnez(AT, rhs, AT); - } else { - __ Selnez(out, lhs, AT); - __ Seleqz(AT, rhs, AT); - } - } else { - __ Slt(AT, lhs, rhs); - if (is_min) { - __ Seleqz(out, rhs, AT); - __ Selnez(AT, lhs, AT); - } else { - __ Selnez(out, rhs, AT); - __ Seleqz(AT, lhs, AT); - } - } - __ Or(out, out, AT); - } -} - -void InstructionCodeGeneratorMIPS64::GenerateMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type) { - FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - Mips64Label noNaNs; - Mips64Label done; - FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); -} - -void InstructionCodeGeneratorMIPS64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { - DataType::Type type = minmax->GetResultType(); - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - GenerateMinMaxInt(minmax->GetLocations(), is_min); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - GenerateMinMaxFP(minmax->GetLocations(), is_min, type); - break; - default: - LOG(FATAL) << "Unexpected type for HMinMax " << type; - } -} - -void LocationsBuilderMIPS64::VisitMin(HMin* min) { - CreateMinMaxLocations(GetGraph()->GetAllocator(), min); -} - -void InstructionCodeGeneratorMIPS64::VisitMin(HMin* min) { - GenerateMinMax(min, /*is_min*/ true); -} - -void LocationsBuilderMIPS64::VisitMax(HMax* max) { - CreateMinMaxLocations(GetGraph()->GetAllocator(), max); -} - -void InstructionCodeGeneratorMIPS64::VisitMax(HMax* max) { - GenerateMinMax(max, /*is_min*/ false); -} - -void LocationsBuilderMIPS64::VisitAbs(HAbs* abs) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); - switch (abs->GetResultType()) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - default: - LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); - } -} - -void InstructionCodeGeneratorMIPS64::VisitAbs(HAbs* abs) { - LocationSummary* locations = abs->GetLocations(); - switch (abs->GetResultType()) { - case DataType::Type::kInt32: { - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - __ Sra(AT, in, 31); - __ Xor(out, in, AT); - __ Subu(out, out, AT); - break; - } - case DataType::Type::kInt64: { - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - __ Dsra32(AT, in, 31); - __ Xor(out, in, AT); - __ Dsubu(out, out, AT); - break; - } - case DataType::Type::kFloat32: { - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - __ AbsS(out, in); - break; - } - case DataType::Type::kFloat64: { - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - __ AbsD(out, in); - break; - } - default: - LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); - } -} - -void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) { - constructor_fence->SetLocations(nullptr); -} - -void InstructionCodeGeneratorMIPS64::VisitConstructorFence( - HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { - GenerateMemoryBarrier(MemBarrierKind::kStoreStore); -} - -void LocationsBuilderMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - memory_barrier->SetLocations(nullptr); -} - -void InstructionCodeGeneratorMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); -} - -void LocationsBuilderMIPS64::VisitReturn(HReturn* ret) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(ret); - DataType::Type return_type = ret->InputAt(0)->GetType(); - locations->SetInAt(0, Mips64ReturnLocation(return_type)); -} - -void InstructionCodeGeneratorMIPS64::VisitReturn(HReturn* ret ATTRIBUTE_UNUSED) { - codegen_->GenerateFrameExit(); -} - -void LocationsBuilderMIPS64::VisitReturnVoid(HReturnVoid* ret) { - ret->SetLocations(nullptr); -} - -void InstructionCodeGeneratorMIPS64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { - codegen_->GenerateFrameExit(); -} - -void LocationsBuilderMIPS64::VisitRor(HRor* ror) { - HandleShift(ror); -} - -void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror) { - HandleShift(ror); -} - -void LocationsBuilderMIPS64::VisitShl(HShl* shl) { - HandleShift(shl); -} - -void InstructionCodeGeneratorMIPS64::VisitShl(HShl* shl) { - HandleShift(shl); -} - -void LocationsBuilderMIPS64::VisitShr(HShr* shr) { - HandleShift(shr); -} - -void InstructionCodeGeneratorMIPS64::VisitShr(HShr* shr) { - HandleShift(shr); -} - -void LocationsBuilderMIPS64::VisitSub(HSub* instruction) { - HandleBinaryOp(instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitSub(HSub* instruction) { - HandleBinaryOp(instruction); -} - -void LocationsBuilderMIPS64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - HandleFieldGet(instruction, instruction->GetFieldInfo()); -} - -void InstructionCodeGeneratorMIPS64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - HandleFieldGet(instruction, instruction->GetFieldInfo()); -} - -void LocationsBuilderMIPS64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); -} - -void InstructionCodeGeneratorMIPS64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); -} - -void LocationsBuilderMIPS64::VisitUnresolvedInstanceFieldGet( - HUnresolvedInstanceFieldGet* instruction) { - FieldAccessCallingConventionMIPS64 calling_convention; - codegen_->CreateUnresolvedFieldLocationSummary( - instruction, instruction->GetFieldType(), calling_convention); -} - -void InstructionCodeGeneratorMIPS64::VisitUnresolvedInstanceFieldGet( - HUnresolvedInstanceFieldGet* instruction) { - FieldAccessCallingConventionMIPS64 calling_convention; - codegen_->GenerateUnresolvedFieldAccess(instruction, - instruction->GetFieldType(), - instruction->GetFieldIndex(), - instruction->GetDexPc(), - calling_convention); -} - -void LocationsBuilderMIPS64::VisitUnresolvedInstanceFieldSet( - HUnresolvedInstanceFieldSet* instruction) { - FieldAccessCallingConventionMIPS64 calling_convention; - codegen_->CreateUnresolvedFieldLocationSummary( - instruction, instruction->GetFieldType(), calling_convention); -} - -void InstructionCodeGeneratorMIPS64::VisitUnresolvedInstanceFieldSet( - HUnresolvedInstanceFieldSet* instruction) { - FieldAccessCallingConventionMIPS64 calling_convention; - codegen_->GenerateUnresolvedFieldAccess(instruction, - instruction->GetFieldType(), - instruction->GetFieldIndex(), - instruction->GetDexPc(), - calling_convention); -} - -void LocationsBuilderMIPS64::VisitUnresolvedStaticFieldGet( - HUnresolvedStaticFieldGet* instruction) { - FieldAccessCallingConventionMIPS64 calling_convention; - codegen_->CreateUnresolvedFieldLocationSummary( - instruction, instruction->GetFieldType(), calling_convention); -} - -void InstructionCodeGeneratorMIPS64::VisitUnresolvedStaticFieldGet( - HUnresolvedStaticFieldGet* instruction) { - FieldAccessCallingConventionMIPS64 calling_convention; - codegen_->GenerateUnresolvedFieldAccess(instruction, - instruction->GetFieldType(), - instruction->GetFieldIndex(), - instruction->GetDexPc(), - calling_convention); -} - -void LocationsBuilderMIPS64::VisitUnresolvedStaticFieldSet( - HUnresolvedStaticFieldSet* instruction) { - FieldAccessCallingConventionMIPS64 calling_convention; - codegen_->CreateUnresolvedFieldLocationSummary( - instruction, instruction->GetFieldType(), calling_convention); -} - -void InstructionCodeGeneratorMIPS64::VisitUnresolvedStaticFieldSet( - HUnresolvedStaticFieldSet* instruction) { - FieldAccessCallingConventionMIPS64 calling_convention; - codegen_->GenerateUnresolvedFieldAccess(instruction, - instruction->GetFieldType(), - instruction->GetFieldIndex(), - instruction->GetDexPc(), - calling_convention); -} - -void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, LocationSummary::kCallOnSlowPath); - // In suspend check slow path, usually there are no caller-save registers at all. - // If SIMD instructions are present, however, we force spilling all live SIMD - // registers in full width (since the runtime only saves/restores lower part). - locations->SetCustomSlowPathCallerSaves( - GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); -} - -void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) { - HBasicBlock* block = instruction->GetBlock(); - if (block->GetLoopInformation() != nullptr) { - DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); - // The back edge will generate the suspend check. - return; - } - if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { - // The goto will generate the suspend check. - return; - } - GenerateSuspendCheck(instruction, nullptr); -} - -void LocationsBuilderMIPS64::VisitThrow(HThrow* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( - instruction, LocationSummary::kCallOnMainOnly); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); -} - -void InstructionCodeGeneratorMIPS64::VisitThrow(HThrow* instruction) { - codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); -} - -void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) { - DataType::Type input_type = conversion->GetInputType(); - DataType::Type result_type = conversion->GetResultType(); - DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) - << input_type << " -> " << result_type; - - if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) || - (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) { - LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; - } - - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(conversion); - - if (DataType::IsFloatingPointType(input_type)) { - locations->SetInAt(0, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(0, Location::RequiresRegister()); - } - - if (DataType::IsFloatingPointType(result_type)) { - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } -} - -void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conversion) { - LocationSummary* locations = conversion->GetLocations(); - DataType::Type result_type = conversion->GetResultType(); - DataType::Type input_type = conversion->GetInputType(); - - DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) - << input_type << " -> " << result_type; - - if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); - - switch (result_type) { - case DataType::Type::kUint8: - __ Andi(dst, src, 0xFF); - break; - case DataType::Type::kInt8: - if (input_type == DataType::Type::kInt64) { - // Type conversion from long to types narrower than int is a result of code - // transformations. To avoid unpredictable results for SEB and SEH, we first - // need to sign-extend the low 32-bit value into bits 32 through 63. - __ Sll(dst, src, 0); - __ Seb(dst, dst); - } else { - __ Seb(dst, src); - } - break; - case DataType::Type::kUint16: - __ Andi(dst, src, 0xFFFF); - break; - case DataType::Type::kInt16: - if (input_type == DataType::Type::kInt64) { - // Type conversion from long to types narrower than int is a result of code - // transformations. To avoid unpredictable results for SEB and SEH, we first - // need to sign-extend the low 32-bit value into bits 32 through 63. - __ Sll(dst, src, 0); - __ Seh(dst, dst); - } else { - __ Seh(dst, src); - } - break; - case DataType::Type::kInt32: - case DataType::Type::kInt64: - // Sign-extend 32-bit int into bits 32 through 63 for int-to-long and long-to-int - // conversions, except when the input and output registers are the same and we are not - // converting longs to shorter types. In these cases, do nothing. - if ((input_type == DataType::Type::kInt64) || (dst != src)) { - __ Sll(dst, src, 0); - } - break; - - default: - LOG(FATAL) << "Unexpected type conversion from " << input_type - << " to " << result_type; - } - } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) { - FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); - GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); - if (input_type == DataType::Type::kInt64) { - __ Dmtc1(src, FTMP); - if (result_type == DataType::Type::kFloat32) { - __ Cvtsl(dst, FTMP); - } else { - __ Cvtdl(dst, FTMP); - } - } else { - __ Mtc1(src, FTMP); - if (result_type == DataType::Type::kFloat32) { - __ Cvtsw(dst, FTMP); - } else { - __ Cvtdw(dst, FTMP); - } - } - } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) { - CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64); - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>(); - - if (result_type == DataType::Type::kInt64) { - if (input_type == DataType::Type::kFloat32) { - __ TruncLS(FTMP, src); - } else { - __ TruncLD(FTMP, src); - } - __ Dmfc1(dst, FTMP); - } else { - if (input_type == DataType::Type::kFloat32) { - __ TruncWS(FTMP, src); - } else { - __ TruncWD(FTMP, src); - } - __ Mfc1(dst, FTMP); - } - } else if (DataType::IsFloatingPointType(result_type) && - DataType::IsFloatingPointType(input_type)) { - FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); - FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>(); - if (result_type == DataType::Type::kFloat32) { - __ Cvtsd(dst, src); - } else { - __ Cvtds(dst, src); - } - } else { - LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type - << " to " << result_type; - } -} - -void LocationsBuilderMIPS64::VisitUShr(HUShr* ushr) { - HandleShift(ushr); -} - -void InstructionCodeGeneratorMIPS64::VisitUShr(HUShr* ushr) { - HandleShift(ushr); -} - -void LocationsBuilderMIPS64::VisitXor(HXor* instruction) { - HandleBinaryOp(instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitXor(HXor* instruction) { - HandleBinaryOp(instruction); -} - -void LocationsBuilderMIPS64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { - // Nothing to do, this should be removed during prepare for register allocator. - LOG(FATAL) << "Unreachable"; -} - -void InstructionCodeGeneratorMIPS64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { - // Nothing to do, this should be removed during prepare for register allocator. - LOG(FATAL) << "Unreachable"; -} - -void LocationsBuilderMIPS64::VisitEqual(HEqual* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS64::VisitEqual(HEqual* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS64::VisitNotEqual(HNotEqual* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS64::VisitNotEqual(HNotEqual* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS64::VisitLessThan(HLessThan* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS64::VisitLessThan(HLessThan* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS64::VisitGreaterThan(HGreaterThan* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS64::VisitGreaterThan(HGreaterThan* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS64::VisitBelow(HBelow* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS64::VisitBelow(HBelow* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS64::VisitAbove(HAbove* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS64::VisitAbove(HAbove* comp) { - HandleCondition(comp); -} - -void LocationsBuilderMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) { - HandleCondition(comp); -} - -void InstructionCodeGeneratorMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) { - HandleCondition(comp); -} - -// Simple implementation of packed switch - generate cascaded compare/jumps. -void LocationsBuilderMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); -} - -void InstructionCodeGeneratorMIPS64::GenPackedSwitchWithCompares(GpuRegister value_reg, - int32_t lower_bound, - uint32_t num_entries, - HBasicBlock* switch_block, - HBasicBlock* default_block) { - // Create a set of compare/jumps. - GpuRegister temp_reg = TMP; - __ Addiu32(temp_reg, value_reg, -lower_bound); - // Jump to default if index is negative - // Note: We don't check the case that index is positive while value < lower_bound, because in - // this case, index >= num_entries must be true. So that we can save one branch instruction. - __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block)); - - const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); - // Jump to successors[0] if value == lower_bound. - __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0])); - int32_t last_index = 0; - for (; num_entries - last_index > 2; last_index += 2) { - __ Addiu(temp_reg, temp_reg, -2); - // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. - __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); - // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. - __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2])); - } - if (num_entries - last_index == 2) { - // The last missing case_value. - __ Addiu(temp_reg, temp_reg, -1); - __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); - } - - // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_block, default_block)) { - __ Bc(codegen_->GetLabelOf(default_block)); - } -} - -void InstructionCodeGeneratorMIPS64::GenTableBasedPackedSwitch(GpuRegister value_reg, - int32_t lower_bound, - uint32_t num_entries, - HBasicBlock* switch_block, - HBasicBlock* default_block) { - // Create a jump table. - std::vector<Mips64Label*> labels(num_entries); - const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); - for (uint32_t i = 0; i < num_entries; i++) { - labels[i] = codegen_->GetLabelOf(successors[i]); - } - JumpTable* table = __ CreateJumpTable(std::move(labels)); - - // Is the value in range? - __ Addiu32(TMP, value_reg, -lower_bound); - __ LoadConst32(AT, num_entries); - __ Bgeuc(TMP, AT, codegen_->GetLabelOf(default_block)); - - // We are in the range of the table. - // Load the target address from the jump table, indexing by the value. - __ LoadLabelAddress(AT, table->GetLabel()); - __ Dlsa(TMP, TMP, AT, 2); - __ Lw(TMP, TMP, 0); - // Compute the absolute target address by adding the table start address - // (the table contains offsets to targets relative to its start). - __ Daddu(TMP, TMP, AT); - // And jump. - __ Jr(TMP); - __ Nop(); -} - -void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { - int32_t lower_bound = switch_instr->GetStartValue(); - uint32_t num_entries = switch_instr->GetNumEntries(); - LocationSummary* locations = switch_instr->GetLocations(); - GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>(); - HBasicBlock* switch_block = switch_instr->GetBlock(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - - if (num_entries > kPackedSwitchJumpTableThreshold) { - GenTableBasedPackedSwitch(value_reg, - lower_bound, - num_entries, - switch_block, - default_block); - } else { - GenPackedSwitchWithCompares(value_reg, - lower_bound, - num_entries, - switch_block, - default_block); - } -} - -void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet* instruction) { - LocationSummary* locations = - new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet* instruction) { - LocationSummary* locations = instruction->GetLocations(); - if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { - uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( - instruction->GetIndex(), kMips64PointerSize).SizeValue(); - __ LoadFromOffset(kLoadDoubleword, - locations->Out().AsRegister<GpuRegister>(), - locations->InAt(0).AsRegister<GpuRegister>(), - method_offset); - } else { - uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex(), kMips64PointerSize)); - __ LoadFromOffset(kLoadDoubleword, - locations->Out().AsRegister<GpuRegister>(), - locations->InAt(0).AsRegister<GpuRegister>(), - mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value()); - __ LoadFromOffset(kLoadDoubleword, - locations->Out().AsRegister<GpuRegister>(), - locations->Out().AsRegister<GpuRegister>(), - method_offset); - } -} - -void LocationsBuilderMIPS64::VisitIntermediateAddress(HIntermediateAddress* instruction - ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; -} - -void InstructionCodeGeneratorMIPS64::VisitIntermediateAddress(HIntermediateAddress* instruction - ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; -} - -} // namespace mips64 -} // namespace art diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h deleted file mode 100644 index 52f3a62f33..0000000000 --- a/compiler/optimizing/code_generator_mips64.h +++ /dev/null @@ -1,693 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_ -#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_ - -#include "code_generator.h" -#include "dex/type_reference.h" -#include "driver/compiler_options.h" -#include "nodes.h" -#include "parallel_move_resolver.h" -#include "utils/mips64/assembler_mips64.h" - -namespace art { -namespace mips64 { - -// InvokeDexCallingConvention registers - -static constexpr GpuRegister kParameterCoreRegisters[] = - { A1, A2, A3, A4, A5, A6, A7 }; -static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); - -static constexpr FpuRegister kParameterFpuRegisters[] = - { F13, F14, F15, F16, F17, F18, F19 }; -static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters); - - -// InvokeRuntimeCallingConvention registers - -static constexpr GpuRegister kRuntimeParameterCoreRegisters[] = - { A0, A1, A2, A3, A4, A5, A6, A7 }; -static constexpr size_t kRuntimeParameterCoreRegistersLength = - arraysize(kRuntimeParameterCoreRegisters); - -static constexpr FpuRegister kRuntimeParameterFpuRegisters[] = - { F12, F13, F14, F15, F16, F17, F18, F19 }; -static constexpr size_t kRuntimeParameterFpuRegistersLength = - arraysize(kRuntimeParameterFpuRegisters); - - -static constexpr GpuRegister kCoreCalleeSaves[] = - { S0, S1, S2, S3, S4, S5, S6, S7, GP, S8, RA }; -static constexpr FpuRegister kFpuCalleeSaves[] = - { F24, F25, F26, F27, F28, F29, F30, F31 }; - - -class CodeGeneratorMIPS64; - -VectorRegister VectorRegisterFrom(Location location); - -class InvokeDexCallingConvention : public CallingConvention<GpuRegister, FpuRegister> { - public: - InvokeDexCallingConvention() - : CallingConvention(kParameterCoreRegisters, - kParameterCoreRegistersLength, - kParameterFpuRegisters, - kParameterFpuRegistersLength, - kMips64PointerSize) {} - - private: - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); -}; - -class InvokeDexCallingConventionVisitorMIPS64 : public InvokeDexCallingConventionVisitor { - public: - InvokeDexCallingConventionVisitorMIPS64() {} - virtual ~InvokeDexCallingConventionVisitorMIPS64() {} - - Location GetNextLocation(DataType::Type type) override; - Location GetReturnLocation(DataType::Type type) const override; - Location GetMethodLocation() const override; - - private: - InvokeDexCallingConvention calling_convention; - - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorMIPS64); -}; - -class InvokeRuntimeCallingConvention : public CallingConvention<GpuRegister, FpuRegister> { - public: - InvokeRuntimeCallingConvention() - : CallingConvention(kRuntimeParameterCoreRegisters, - kRuntimeParameterCoreRegistersLength, - kRuntimeParameterFpuRegisters, - kRuntimeParameterFpuRegistersLength, - kMips64PointerSize) {} - - Location GetReturnLocation(DataType::Type return_type); - - private: - DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); -}; - -class FieldAccessCallingConventionMIPS64 : public FieldAccessCallingConvention { - public: - FieldAccessCallingConventionMIPS64() {} - - Location GetObjectLocation() const override { - return Location::RegisterLocation(A1); - } - Location GetFieldIndexLocation() const override { - return Location::RegisterLocation(A0); - } - Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { - return Location::RegisterLocation(V0); - } - Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, - bool is_instance) const override { - return is_instance - ? Location::RegisterLocation(A2) - : Location::RegisterLocation(A1); - } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { - return Location::FpuRegisterLocation(F0); - } - - private: - DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionMIPS64); -}; - -class ParallelMoveResolverMIPS64 : public ParallelMoveResolverWithSwap { - public: - ParallelMoveResolverMIPS64(ArenaAllocator* allocator, CodeGeneratorMIPS64* codegen) - : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} - - void EmitMove(size_t index) override; - void EmitSwap(size_t index) override; - void SpillScratch(int reg) override; - void RestoreScratch(int reg) override; - - void Exchange(int index1, int index2, bool double_slot); - void ExchangeQuadSlots(int index1, int index2); - - Mips64Assembler* GetAssembler() const; - - private: - CodeGeneratorMIPS64* const codegen_; - - DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverMIPS64); -}; - -class SlowPathCodeMIPS64 : public SlowPathCode { - public: - explicit SlowPathCodeMIPS64(HInstruction* instruction) - : SlowPathCode(instruction), entry_label_(), exit_label_() {} - - Mips64Label* GetEntryLabel() { return &entry_label_; } - Mips64Label* GetExitLabel() { return &exit_label_; } - - private: - Mips64Label entry_label_; - Mips64Label exit_label_; - - DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS64); -}; - -class LocationsBuilderMIPS64 : public HGraphVisitor { - public: - LocationsBuilderMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen) - : HGraphVisitor(graph), codegen_(codegen) {} - -#define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) override; - - FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) - FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(DECLARE_VISIT_INSTRUCTION) - -#undef DECLARE_VISIT_INSTRUCTION - - void VisitInstruction(HInstruction* instruction) override { - LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() - << " (id " << instruction->GetId() << ")"; - } - - private: - void HandleInvoke(HInvoke* invoke); - void HandleBinaryOp(HBinaryOperation* operation); - void HandleCondition(HCondition* instruction); - void HandleShift(HBinaryOperation* operation); - void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); - void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); - Location RegisterOrZeroConstant(HInstruction* instruction); - Location FpuRegisterOrConstantForStore(HInstruction* instruction); - - InvokeDexCallingConventionVisitorMIPS64 parameter_visitor_; - - CodeGeneratorMIPS64* const codegen_; - - DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS64); -}; - -class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { - public: - InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen); - -#define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) override; - - FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) - FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(DECLARE_VISIT_INSTRUCTION) - -#undef DECLARE_VISIT_INSTRUCTION - - void VisitInstruction(HInstruction* instruction) override { - LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() - << " (id " << instruction->GetId() << ")"; - } - - Mips64Assembler* GetAssembler() const { return assembler_; } - - // Compare-and-jump packed switch generates approx. 3 + 2.5 * N 32-bit - // instructions for N cases. - // Table-based packed switch generates approx. 11 32-bit instructions - // and N 32-bit data words for N cases. - // At N = 6 they come out as 18 and 17 32-bit words respectively. - // We switch to the table-based method starting with 7 cases. - static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; - - void GenerateMemoryBarrier(MemBarrierKind kind); - - private: - void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg); - void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp); - void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); - void HandleBinaryOp(HBinaryOperation* operation); - void HandleCondition(HCondition* instruction); - void HandleShift(HBinaryOperation* operation); - void HandleFieldSet(HInstruction* instruction, - const FieldInfo& field_info, - bool value_can_be_null); - void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); - - void GenerateMinMaxInt(LocationSummary* locations, bool is_min); - void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); - void GenerateMinMax(HBinaryOperation* minmax, bool is_min); - - // Generate a heap reference load using one register `out`: - // - // out <- *(out + offset) - // - // while honoring heap poisoning and/or read barriers (if any). - // - // Location `maybe_temp` is used when generating a read barrier and - // shall be a register in that case; it may be an invalid location - // otherwise. - void GenerateReferenceLoadOneRegister(HInstruction* instruction, - Location out, - uint32_t offset, - Location maybe_temp, - ReadBarrierOption read_barrier_option); - // Generate a heap reference load using two different registers - // `out` and `obj`: - // - // out <- *(obj + offset) - // - // while honoring heap poisoning and/or read barriers (if any). - // - // Location `maybe_temp` is used when generating a Baker's (fast - // path) read barrier and shall be a register in that case; it may - // be an invalid location otherwise. - void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, - Location out, - Location obj, - uint32_t offset, - Location maybe_temp, - ReadBarrierOption read_barrier_option); - - // Generate a GC root reference load: - // - // root <- *(obj + offset) - // - // while honoring read barriers (if any). - void GenerateGcRootFieldLoad(HInstruction* instruction, - Location root, - GpuRegister obj, - uint32_t offset, - ReadBarrierOption read_barrier_option, - Mips64Label* label_low = nullptr); - - void GenerateTestAndBranch(HInstruction* instruction, - size_t condition_input_index, - Mips64Label* true_target, - Mips64Label* false_target); - void DivRemOneOrMinusOne(HBinaryOperation* instruction); - void DivRemByPowerOfTwo(HBinaryOperation* instruction); - void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); - void GenerateDivRemIntegral(HBinaryOperation* instruction); - void GenerateIntLongCompare(IfCondition cond, bool is64bit, LocationSummary* locations); - // When the function returns `false` it means that the condition holds if `dst` is non-zero - // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero - // `dst` are exchanged. - bool MaterializeIntLongCompare(IfCondition cond, - bool is64bit, - LocationSummary* input_locations, - GpuRegister dst); - void GenerateIntLongCompareAndBranch(IfCondition cond, - bool is64bit, - LocationSummary* locations, - Mips64Label* label); - void GenerateFpCompare(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* locations); - // When the function returns `false` it means that the condition holds if `dst` is non-zero - // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero - // `dst` are exchanged. - bool MaterializeFpCompare(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* input_locations, - FpuRegister dst); - void GenerateFpCompareAndBranch(IfCondition cond, - bool gt_bias, - DataType::Type type, - LocationSummary* locations, - Mips64Label* label); - void HandleGoto(HInstruction* got, HBasicBlock* successor); - void GenPackedSwitchWithCompares(GpuRegister value_reg, - int32_t lower_bound, - uint32_t num_entries, - HBasicBlock* switch_block, - HBasicBlock* default_block); - void GenTableBasedPackedSwitch(GpuRegister value_reg, - int32_t lower_bound, - uint32_t num_entries, - HBasicBlock* switch_block, - HBasicBlock* default_block); - int32_t VecAddress(LocationSummary* locations, - size_t size, - /* out */ GpuRegister* adjusted_base); - void GenConditionalMove(HSelect* select); - - Mips64Assembler* const assembler_; - CodeGeneratorMIPS64* const codegen_; - - DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorMIPS64); -}; - -class CodeGeneratorMIPS64 : public CodeGenerator { - public: - CodeGeneratorMIPS64(HGraph* graph, - const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats = nullptr); - virtual ~CodeGeneratorMIPS64() {} - - void GenerateFrameEntry() override; - void GenerateFrameExit() override; - - void Bind(HBasicBlock* block) override; - - size_t GetWordSize() const override { return kMips64DoublewordSize; } - - size_t GetFloatingPointSpillSlotSize() const override { - return GetGraph()->HasSIMD() - ? 2 * kMips64DoublewordSize // 16 bytes for each spill. - : 1 * kMips64DoublewordSize; // 8 bytes for each spill. - } - - uintptr_t GetAddressOf(HBasicBlock* block) override { - return assembler_.GetLabelLocation(GetLabelOf(block)); - } - - HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } - Mips64Assembler* GetAssembler() override { return &assembler_; } - const Mips64Assembler& GetAssembler() const override { return assembler_; } - - // Emit linker patches. - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; - - // Fast path implementation of ReadBarrier::Barrier for a heap - // reference field load when Baker's read barriers are used. - void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - GpuRegister obj, - uint32_t offset, - Location temp, - bool needs_null_check); - // Fast path implementation of ReadBarrier::Barrier for a heap - // reference array load when Baker's read barriers are used. - void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - GpuRegister obj, - uint32_t data_offset, - Location index, - Location temp, - bool needs_null_check); - - // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, - // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. - // - // Load the object reference located at the address - // `obj + offset + (index << scale_factor)`, held by object `obj`, into - // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - GpuRegister obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - Location temp, - bool needs_null_check, - bool always_update_field = false); - - // Generate a read barrier for a heap reference within `instruction` - // using a slow path. - // - // A read barrier for an object reference read from the heap is - // implemented as a call to the artReadBarrierSlow runtime entry - // point, which is passed the values in locations `ref`, `obj`, and - // `offset`: - // - // mirror::Object* artReadBarrierSlow(mirror::Object* ref, - // mirror::Object* obj, - // uint32_t offset); - // - // The `out` location contains the value returned by - // artReadBarrierSlow. - // - // When `index` is provided (i.e. for array accesses), the offset - // value passed to artReadBarrierSlow is adjusted to take `index` - // into account. - void GenerateReadBarrierSlow(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // If read barriers are enabled, generate a read barrier for a heap - // reference using a slow path. If heap poisoning is enabled, also - // unpoison the reference in `out`. - void MaybeGenerateReadBarrierSlow(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // Generate a read barrier for a GC root within `instruction` using - // a slow path. - // - // A read barrier for an object reference GC root is implemented as - // a call to the artReadBarrierForRootSlow runtime entry point, - // which is passed the value in location `root`: - // - // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); - // - // The `out` location contains the value returned by - // artReadBarrierForRootSlow. - void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); - - void MarkGCCard(GpuRegister object, GpuRegister value, bool value_can_be_null); - - // Register allocation. - - void SetupBlockedRegisters() const override; - - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; - - void DumpCoreRegister(std::ostream& stream, int reg) const override; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - - InstructionSet GetInstructionSet() const override { return InstructionSet::kMips64; } - - const Mips64InstructionSetFeatures& GetInstructionSetFeatures() const; - - Mips64Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Mips64Label>(block_labels_, block); - } - - void Initialize() override { - block_labels_ = CommonInitializeLabels<Mips64Label>(); - } - - // We prefer aligned loads and stores (less code), so spill and restore registers in slow paths - // at aligned locations. - uint32_t GetPreferredSlotsAlignment() const override { return kMips64DoublewordSize; } - - void Finalize(CodeAllocator* allocator) override; - - // Code generation helpers. - void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; - - void MoveConstant(Location destination, int32_t value) override; - - void AddLocationAsTemp(Location location, LocationSummary* locations) override; - - - void SwapLocations(Location loc1, Location loc2, DataType::Type type); - - // Generate code to invoke a runtime entry point. - void InvokeRuntime(QuickEntrypointEnum entrypoint, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) override; - - // Generate code to invoke a runtime entry point, but do not record - // PC-related information in a stack map. - void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, - HInstruction* instruction, - SlowPathCode* slow_path); - - void GenerateInvokeRuntime(int32_t entry_point_offset); - - ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; } - - bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { return false; } - - // Check if the desired_string_load_kind is supported. If it is, return it, - // otherwise return a fall-back kind that should be used instead. - HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) override; - - // Check if the desired_class_load_kind is supported. If it is, return it, - // otherwise return a fall-back kind that should be used instead. - HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) override; - - // Check if the desired_dispatch_info is supported. If it is, return it, - // otherwise return a fall-back info that should be used instead. - HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - ArtMethod* method) override; - - void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; - void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; - - void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, - DataType::Type type ATTRIBUTE_UNUSED) override { - UNIMPLEMENTED(FATAL) << "Not implemented on MIPS64"; - } - - void GenerateNop() override; - void GenerateImplicitNullCheck(HNullCheck* instruction) override; - void GenerateExplicitNullCheck(HNullCheck* instruction) override; - - // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, - // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. - // - // The 16-bit halves of the 32-bit PC-relative offset are patched separately, necessitating - // two patches/infos. There can be more than two patches/infos if the instruction supplying - // the high half is shared with e.g. a slow path, while the low half is supplied by separate - // instructions, e.g.: - // auipc r1, high // patch - // lwu r2, low(r1) // patch - // beqzc r2, slow_path - // back: - // ... - // slow_path: - // ... - // sw r2, low(r1) // patch - // bc back - struct PcRelativePatchInfo : PatchInfo<Mips64Label> { - PcRelativePatchInfo(const DexFile* dex_file, - uint32_t off_or_idx, - const PcRelativePatchInfo* info_high) - : PatchInfo<Mips64Label>(dex_file, off_or_idx), - patch_info_high(info_high) { } - - // Pointer to the info for the high half patch or nullptr if this is the high half patch info. - const PcRelativePatchInfo* patch_info_high; - - private: - PcRelativePatchInfo(PcRelativePatchInfo&& other) = delete; - DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); - }; - - PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, - dex::TypeIndex type_index, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, - dex::TypeIndex type_index, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high = nullptr); - Literal* DeduplicateBootImageAddressLiteral(uint64_t address); - - void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, - GpuRegister out, - PcRelativePatchInfo* info_low = nullptr); - - void LoadBootImageAddress(GpuRegister reg, uint32_t boot_image_reference); - void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); - - void PatchJitRootUse(uint8_t* code, - const uint8_t* roots_data, - const Literal* literal, - uint64_t index_in_table) const; - Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, - dex::StringIndex string_index, - Handle<mirror::String> handle); - Literal* DeduplicateJitClassLiteral(const DexFile& dex_file, - dex::TypeIndex type_index, - Handle<mirror::Class> handle); - - private: - using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; - using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, Literal*>; - using StringToLiteralMap = ArenaSafeMap<StringReference, - Literal*, - StringReferenceValueComparator>; - using TypeToLiteralMap = ArenaSafeMap<TypeReference, - Literal*, - TypeReferenceValueComparator>; - - Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); - Literal* DeduplicateUint64Literal(uint64_t value); - - PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file, - uint32_t offset_or_index, - const PcRelativePatchInfo* info_high, - ArenaDeque<PcRelativePatchInfo>* patches); - - template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> - void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, - ArenaVector<linker::LinkerPatch>* linker_patches); - - // Labels for each block that will be compiled. - Mips64Label* block_labels_; // Indexed by block id. - Mips64Label frame_entry_label_; - LocationsBuilderMIPS64 location_builder_; - InstructionCodeGeneratorMIPS64 instruction_visitor_; - ParallelMoveResolverMIPS64 move_resolver_; - Mips64Assembler assembler_; - - // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. - Uint32ToLiteralMap uint32_literals_; - // Deduplication map for 64-bit literals, used for non-patchable method address or method code - // address. - Uint64ToLiteralMap uint64_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. - // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). - ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; - // PC-relative method patch info for kBssEntry. - ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; - // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; - // PC-relative type patch info for kBssEntry. - ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; - // PC-relative type patch info for kBssEntry. - ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; - // PC-relative patch info for IntrinsicObjects. - ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; - - // Patches for string root accesses in JIT compiled code. - StringToLiteralMap jit_string_patches_; - // Patches for class root accesses in JIT compiled code. - TypeToLiteralMap jit_class_patches_; - - DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS64); -}; - -} // namespace mips64 -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_ diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc deleted file mode 100644 index 4e9ba0d3d2..0000000000 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ /dev/null @@ -1,1430 +0,0 @@ -/* - * Copyright (C) 2017 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "code_generator_mips.h" -#include "mirror/array-inl.h" - -namespace art { -namespace mips { - -// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. -#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT - -void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ FillB(dst, locations->InAt(0).AsRegister<Register>()); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ FillH(dst, locations->InAt(0).AsRegister<Register>()); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FillW(dst, locations->InAt(0).AsRegister<Register>()); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ InsertW(static_cast<VectorRegister>(FTMP), - locations->InAt(0).AsRegisterPairLow<Register>(), - 0); - __ InsertW(static_cast<VectorRegister>(FTMP), - locations->InAt(0).AsRegisterPairHigh<Register>(), - 1); - __ ReplicateFPToVectorRegister(dst, FTMP, /* is_double= */ true); - break; - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ ReplicateFPToVectorRegister(dst, - locations->InAt(0).AsFpuRegister<FRegister>(), - /* is_double= */ false); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ ReplicateFPToVectorRegister(dst, - locations->InAt(0).AsFpuRegister<FRegister>(), - /* is_double= */ true); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresRegister()); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister src = VectorRegisterFrom(locations->InAt(0)); - switch (instruction->GetPackedType()) { - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Copy_sW(locations->Out().AsRegister<Register>(), src, 0); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Copy_sW(locations->Out().AsRegisterPairLow<Register>(), src, 0); - __ Copy_sW(locations->Out().AsRegisterPairHigh<Register>(), src, 1); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK_LE(2u, instruction->GetVectorLength()); - DCHECK_LE(instruction->GetVectorLength(), 4u); - DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -// Helper to set up locations for vector unary operations. -static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { - LocationSummary* locations = new (allocator) LocationSummary(instruction); - DataType::Type type = instruction->GetPackedType(); - switch (type) { - case DataType::Type::kBool: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), - instruction->IsVecNot() ? Location::kOutputOverlap - : Location::kNoOutputOverlap); - break; - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), - (instruction->IsVecNeg() || instruction->IsVecAbs() || - (instruction->IsVecReduce() && type == DataType::Type::kInt64)) - ? Location::kOutputOverlap - : Location::kNoOutputOverlap); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecReduce(HVecReduce* instruction) { - CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister src = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - VectorRegister tmp = static_cast<VectorRegister>(FTMP); - switch (instruction->GetPackedType()) { - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - switch (instruction->GetReductionKind()) { - case HVecReduce::kSum: - __ Hadd_sD(tmp, src, src); - __ IlvlD(dst, tmp, tmp); - __ AddvW(dst, dst, tmp); - break; - case HVecReduce::kMin: - __ IlvodW(tmp, src, src); - __ Min_sW(tmp, src, tmp); - __ IlvlW(dst, tmp, tmp); - __ Min_sW(dst, dst, tmp); - break; - case HVecReduce::kMax: - __ IlvodW(tmp, src, src); - __ Max_sW(tmp, src, tmp); - __ IlvlW(dst, tmp, tmp); - __ Max_sW(dst, dst, tmp); - break; - } - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - switch (instruction->GetReductionKind()) { - case HVecReduce::kSum: - __ IlvlD(dst, src, src); - __ AddvD(dst, dst, src); - break; - case HVecReduce::kMin: - __ IlvlD(dst, src, src); - __ Min_sD(dst, dst, src); - break; - case HVecReduce::kMax: - __ IlvlD(dst, src, src); - __ Max_sD(dst, dst, src); - break; - } - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) { - CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister src = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - DataType::Type from = instruction->GetInputType(); - DataType::Type to = instruction->GetResultType(); - if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) { - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Ffint_sW(dst, src); - } else { - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - } -} - -void LocationsBuilderMIPS::VisitVecNeg(HVecNeg* instruction) { - CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister src = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ FillB(dst, ZERO); - __ SubvB(dst, dst, src); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ FillH(dst, ZERO); - __ SubvH(dst, dst, src); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FillW(dst, ZERO); - __ SubvW(dst, dst, src); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FillW(dst, ZERO); - __ SubvD(dst, dst, src); - break; - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FillW(dst, ZERO); - __ FsubW(dst, dst, src); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FillW(dst, ZERO); - __ FsubD(dst, dst, src); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecAbs(HVecAbs* instruction) { - CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister src = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ FillB(dst, ZERO); // all zeroes - __ Add_aB(dst, dst, src); // dst = abs(0) + abs(src) - break; - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ FillH(dst, ZERO); // all zeroes - __ Add_aH(dst, dst, src); // dst = abs(0) + abs(src) - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FillW(dst, ZERO); // all zeroes - __ Add_aW(dst, dst, src); // dst = abs(0) + abs(src) - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FillW(dst, ZERO); // all zeroes - __ Add_aD(dst, dst, src); // dst = abs(0) + abs(src) - break; - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ LdiW(dst, -1); // all ones - __ SrliW(dst, dst, 1); - __ AndV(dst, dst, src); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ LdiD(dst, -1); // all ones - __ SrliD(dst, dst, 1); - __ AndV(dst, dst, src); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecNot(HVecNot* instruction) { - CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister src = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: // special case boolean-not - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ LdiB(dst, 1); - __ XorV(dst, dst, src); - break; - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK_LE(2u, instruction->GetVectorLength()); - DCHECK_LE(instruction->GetVectorLength(), 16u); - __ NorV(dst, src, src); // lanes do not matter - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -// Helper to set up locations for vector binary operations. -static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (allocator) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecAdd(HVecAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ AddvB(dst, lhs, rhs); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ AddvH(dst, lhs, rhs); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ AddvW(dst, lhs, rhs); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ AddvD(dst, lhs, rhs); - break; - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FaddW(dst, lhs, rhs); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FaddD(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { - LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); -} - -void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - instruction->IsRounded() - ? __ Aver_uB(dst, lhs, rhs) - : __ Ave_uB(dst, lhs, rhs); - break; - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - instruction->IsRounded() - ? __ Aver_sB(dst, lhs, rhs) - : __ Ave_sB(dst, lhs, rhs); - break; - case DataType::Type::kUint16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - instruction->IsRounded() - ? __ Aver_uH(dst, lhs, rhs) - : __ Ave_uH(dst, lhs, rhs); - break; - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - instruction->IsRounded() - ? __ Aver_sH(dst, lhs, rhs) - : __ Ave_sH(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecSub(HVecSub* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ SubvB(dst, lhs, rhs); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ SubvH(dst, lhs, rhs); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ SubvW(dst, lhs, rhs); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ SubvD(dst, lhs, rhs); - break; - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FsubW(dst, lhs, rhs); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FsubD(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) { - LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); -} - -void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ MulvB(dst, lhs, rhs); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ MulvH(dst, lhs, rhs); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ MulvW(dst, lhs, rhs); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ MulvD(dst, lhs, rhs); - break; - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FmulW(dst, lhs, rhs); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FmulD(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecDiv(HVecDiv* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FdivW(dst, lhs, rhs); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FdivD(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecMin(HVecMin* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ Min_uB(dst, lhs, rhs); - break; - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ Min_sB(dst, lhs, rhs); - break; - case DataType::Type::kUint16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ Min_uH(dst, lhs, rhs); - break; - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ Min_sH(dst, lhs, rhs); - break; - case DataType::Type::kUint32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Min_uW(dst, lhs, rhs); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Min_sW(dst, lhs, rhs); - break; - case DataType::Type::kUint64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Min_uD(dst, lhs, rhs); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Min_sD(dst, lhs, rhs); - break; - // When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value. - // TODO: Fix min(x, NaN) cases for float and double. - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FminW(dst, lhs, rhs); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FminD(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecMax(HVecMax* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ Max_uB(dst, lhs, rhs); - break; - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ Max_sB(dst, lhs, rhs); - break; - case DataType::Type::kUint16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ Max_uH(dst, lhs, rhs); - break; - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ Max_sH(dst, lhs, rhs); - break; - case DataType::Type::kUint32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Max_uW(dst, lhs, rhs); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Max_sW(dst, lhs, rhs); - break; - case DataType::Type::kUint64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Max_uD(dst, lhs, rhs); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Max_sD(dst, lhs, rhs); - break; - // When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value. - // TODO: Fix max(x, NaN) cases for float and double. - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FmaxW(dst, lhs, rhs); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FmaxD(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecAnd(HVecAnd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK_LE(2u, instruction->GetVectorLength()); - DCHECK_LE(instruction->GetVectorLength(), 16u); - __ AndV(dst, lhs, rhs); // lanes do not matter - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecAndNot(HVecAndNot* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecAndNot(HVecAndNot* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void LocationsBuilderMIPS::VisitVecOr(HVecOr* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK_LE(2u, instruction->GetVectorLength()); - DCHECK_LE(instruction->GetVectorLength(), 16u); - __ OrV(dst, lhs, rhs); // lanes do not matter - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecXor(HVecXor* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK_LE(2u, instruction->GetVectorLength()); - DCHECK_LE(instruction->GetVectorLength(), 16u); - __ XorV(dst, lhs, rhs); // lanes do not matter - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -// Helper to set up locations for vector shift operations. -static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (allocator) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecShl(HVecShl* instruction) { - CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ SlliB(dst, lhs, value); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ SlliH(dst, lhs, value); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ SlliW(dst, lhs, value); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ SlliD(dst, lhs, value); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecShr(HVecShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ SraiB(dst, lhs, value); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ SraiH(dst, lhs, value); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ SraiW(dst, lhs, value); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ SraiD(dst, lhs, value); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecUShr(HVecUShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ SrliB(dst, lhs, value); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ SrliH(dst, lhs, value); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ SrliW(dst, lhs, value); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ SrliD(dst, lhs, value); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - - DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented - - HInstruction* input = instruction->InputAt(0); - bool is_zero = IsZeroBitPattern(input); - - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) - : Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) - : Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - - DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented - - // Zero out all other elements first. - __ FillW(dst, ZERO); - - // Shorthand for any type of zero. - if (IsZeroBitPattern(instruction->InputAt(0))) { - return; - } - - // Set required elements. - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ InsertB(dst, locations->InAt(0).AsRegister<Register>(), 0); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ InsertH(dst, locations->InAt(0).AsRegister<Register>(), 0); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ InsertW(dst, locations->InAt(0).AsRegister<Register>(), 0); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ InsertW(dst, locations->InAt(0).AsRegisterPairLow<Register>(), 0); - __ InsertW(dst, locations->InAt(0).AsRegisterPairHigh<Register>(), 1); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -// Helper to set up locations for vector accumulations. -static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) { - LocationSummary* locations = new (allocator) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetInAt(2, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { - CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister acc = VectorRegisterFrom(locations->InAt(0)); - VectorRegister left = VectorRegisterFrom(locations->InAt(1)); - VectorRegister right = VectorRegisterFrom(locations->InAt(2)); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->GetOpKind() == HInstruction::kAdd) { - __ MaddvB(acc, left, right); - } else { - __ MsubvB(acc, left, right); - } - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->GetOpKind() == HInstruction::kAdd) { - __ MaddvH(acc, left, right); - } else { - __ MsubvH(acc, left, right); - } - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->GetOpKind() == HInstruction::kAdd) { - __ MaddvW(acc, left, right); - } else { - __ MsubvW(acc, left, right); - } - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->GetOpKind() == HInstruction::kAdd) { - __ MaddvD(acc, left, right); - } else { - __ MsubvD(acc, left, right); - } - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { - CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); - LocationSummary* locations = instruction->GetLocations(); - // All conversions require at least one temporary register. - locations->AddTemp(Location::RequiresFpuRegister()); - // Some conversions require a second temporary register. - HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); - HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); - DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), - HVecOperation::ToSignedType(b->GetPackedType())); - switch (a->GetPackedType()) { - case DataType::Type::kInt32: - if (instruction->GetPackedType() == DataType::Type::kInt32) { - break; - } - FALLTHROUGH_INTENDED; - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - locations->AddTemp(Location::RequiresFpuRegister()); - break; - default: - break; - } -} - -void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister acc = VectorRegisterFrom(locations->InAt(0)); - VectorRegister left = VectorRegisterFrom(locations->InAt(1)); - VectorRegister right = VectorRegisterFrom(locations->InAt(2)); - VectorRegister tmp = static_cast<VectorRegister>(FTMP); - VectorRegister tmp1 = VectorRegisterFrom(locations->GetTemp(0)); - - DCHECK(locations->InAt(0).Equals(locations->Out())); - - // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S). - HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); - HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); - DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), - HVecOperation::ToSignedType(b->GetPackedType())); - switch (a->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, a->GetVectorLength()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint16: - case DataType::Type::kInt16: { - DCHECK_EQ(8u, instruction->GetVectorLength()); - VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); - __ FillB(tmp, ZERO); - __ Hadd_sH(tmp1, left, tmp); - __ Hadd_sH(tmp2, right, tmp); - __ Asub_sH(tmp1, tmp1, tmp2); - __ AddvH(acc, acc, tmp1); - __ Hadd_sH(tmp1, tmp, left); - __ Hadd_sH(tmp2, tmp, right); - __ Asub_sH(tmp1, tmp1, tmp2); - __ AddvH(acc, acc, tmp1); - break; - } - case DataType::Type::kInt32: { - DCHECK_EQ(4u, instruction->GetVectorLength()); - VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); - __ FillB(tmp, ZERO); - __ Hadd_sH(tmp1, left, tmp); - __ Hadd_sH(tmp2, right, tmp); - __ Asub_sH(tmp1, tmp1, tmp2); - __ Hadd_sW(tmp1, tmp1, tmp1); - __ AddvW(acc, acc, tmp1); - __ Hadd_sH(tmp1, tmp, left); - __ Hadd_sH(tmp2, tmp, right); - __ Asub_sH(tmp1, tmp1, tmp2); - __ Hadd_sW(tmp1, tmp1, tmp1); - __ AddvW(acc, acc, tmp1); - break; - } - case DataType::Type::kInt64: { - DCHECK_EQ(2u, instruction->GetVectorLength()); - VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); - __ FillB(tmp, ZERO); - __ Hadd_sH(tmp1, left, tmp); - __ Hadd_sH(tmp2, right, tmp); - __ Asub_sH(tmp1, tmp1, tmp2); - __ Hadd_sW(tmp1, tmp1, tmp1); - __ Hadd_sD(tmp1, tmp1, tmp1); - __ AddvD(acc, acc, tmp1); - __ Hadd_sH(tmp1, tmp, left); - __ Hadd_sH(tmp2, tmp, right); - __ Asub_sH(tmp1, tmp1, tmp2); - __ Hadd_sW(tmp1, tmp1, tmp1); - __ Hadd_sD(tmp1, tmp1, tmp1); - __ AddvD(acc, acc, tmp1); - break; - } - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, a->GetVectorLength()); - switch (instruction->GetPackedType()) { - case DataType::Type::kInt32: { - DCHECK_EQ(4u, instruction->GetVectorLength()); - VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); - __ FillH(tmp, ZERO); - __ Hadd_sW(tmp1, left, tmp); - __ Hadd_sW(tmp2, right, tmp); - __ Asub_sW(tmp1, tmp1, tmp2); - __ AddvW(acc, acc, tmp1); - __ Hadd_sW(tmp1, tmp, left); - __ Hadd_sW(tmp2, tmp, right); - __ Asub_sW(tmp1, tmp1, tmp2); - __ AddvW(acc, acc, tmp1); - break; - } - case DataType::Type::kInt64: { - DCHECK_EQ(2u, instruction->GetVectorLength()); - VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); - __ FillH(tmp, ZERO); - __ Hadd_sW(tmp1, left, tmp); - __ Hadd_sW(tmp2, right, tmp); - __ Asub_sW(tmp1, tmp1, tmp2); - __ Hadd_sD(tmp1, tmp1, tmp1); - __ AddvD(acc, acc, tmp1); - __ Hadd_sW(tmp1, tmp, left); - __ Hadd_sW(tmp2, tmp, right); - __ Asub_sW(tmp1, tmp1, tmp2); - __ Hadd_sD(tmp1, tmp1, tmp1); - __ AddvD(acc, acc, tmp1); - break; - } - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, a->GetVectorLength()); - switch (instruction->GetPackedType()) { - case DataType::Type::kInt32: { - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FillW(tmp, ZERO); - __ SubvW(tmp1, left, right); - __ Add_aW(tmp1, tmp1, tmp); - __ AddvW(acc, acc, tmp1); - break; - } - case DataType::Type::kInt64: { - DCHECK_EQ(2u, instruction->GetVectorLength()); - VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); - __ FillW(tmp, ZERO); - __ Hadd_sD(tmp1, left, tmp); - __ Hadd_sD(tmp2, right, tmp); - __ Asub_sD(tmp1, tmp1, tmp2); - __ AddvD(acc, acc, tmp1); - __ Hadd_sD(tmp1, tmp, left); - __ Hadd_sD(tmp2, tmp, right); - __ Asub_sD(tmp1, tmp1, tmp2); - __ AddvD(acc, acc, tmp1); - break; - } - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } - break; - case DataType::Type::kInt64: { - DCHECK_EQ(2u, a->GetVectorLength()); - switch (instruction->GetPackedType()) { - case DataType::Type::kInt64: { - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FillW(tmp, ZERO); - __ SubvD(tmp1, left, right); - __ Add_aD(tmp1, tmp1, tmp); - __ AddvD(acc, acc, tmp1); - break; - } - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } - break; - } - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecDotProd(HVecDotProd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorMIPS::VisitVecDotProd(HVecDotProd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -// Helper to set up locations for vector memory operations. -static void CreateVecMemLocations(ArenaAllocator* allocator, - HVecMemoryOperation* instruction, - bool is_load) { - LocationSummary* locations = new (allocator) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (is_load) { - locations->SetOut(Location::RequiresFpuRegister()); - } else { - locations->SetInAt(2, Location::RequiresFpuRegister()); - } - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -// Helper to prepare register and offset for vector memory operations. Returns the offset and sets -// the output parameter adjusted_base to the original base or to a reserved temporary register (AT). -int32_t InstructionCodeGeneratorMIPS::VecAddress(LocationSummary* locations, - size_t size, - /* out */ Register* adjusted_base) { - Register base = locations->InAt(0).AsRegister<Register>(); - Location index = locations->InAt(1); - int scale = TIMES_1; - switch (size) { - case 2: scale = TIMES_2; break; - case 4: scale = TIMES_4; break; - case 8: scale = TIMES_8; break; - default: break; - } - int32_t offset = mirror::Array::DataOffset(size).Int32Value(); - - if (index.IsConstant()) { - offset += index.GetConstant()->AsIntConstant()->GetValue() << scale; - __ AdjustBaseOffsetAndElementSizeShift(base, offset, scale); - *adjusted_base = base; - } else { - Register index_reg = index.AsRegister<Register>(); - if (scale != TIMES_1) { - __ Lsa(AT, index_reg, base, scale); - } else { - __ Addu(AT, base, index_reg); - } - *adjusted_base = AT; - } - return offset; -} - -void LocationsBuilderMIPS::VisitVecLoad(HVecLoad* instruction) { - CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ true); -} - -void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) { - LocationSummary* locations = instruction->GetLocations(); - size_t size = DataType::Size(instruction->GetPackedType()); - VectorRegister reg = VectorRegisterFrom(locations->Out()); - Register base; - int32_t offset = VecAddress(locations, size, &base); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ LdB(reg, base, offset); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - // Loading 8-bytes (needed if dealing with compressed strings in StringCharAt) from unaligned - // memory address may cause a trap to the kernel if the CPU doesn't directly support unaligned - // loads and stores. - // TODO: Implement support for StringCharAt. - DCHECK(!instruction->IsStringCharAt()); - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ LdH(reg, base, offset); - break; - case DataType::Type::kInt32: - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ LdW(reg, base, offset); - break; - case DataType::Type::kInt64: - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ LdD(reg, base, offset); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS::VisitVecStore(HVecStore* instruction) { - CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ false); -} - -void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) { - LocationSummary* locations = instruction->GetLocations(); - size_t size = DataType::Size(instruction->GetPackedType()); - VectorRegister reg = VectorRegisterFrom(locations->InAt(2)); - Register base; - int32_t offset = VecAddress(locations, size, &base); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ StB(reg, base, offset); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ StH(reg, base, offset); - break; - case DataType::Type::kInt32: - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ StW(reg, base, offset); - break; - case DataType::Type::kInt64: - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ StD(reg, base, offset); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -#undef __ - -} // namespace mips -} // namespace art diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc deleted file mode 100644 index 6467d3e27f..0000000000 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ /dev/null @@ -1,1428 +0,0 @@ -/* - * Copyright (C) 2017 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "code_generator_mips64.h" -#include "mirror/array-inl.h" - -namespace art { -namespace mips64 { - -// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. -#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT - -VectorRegister VectorRegisterFrom(Location location) { - DCHECK(location.IsFpuRegister()); - return static_cast<VectorRegister>(location.AsFpuRegister<FpuRegister>()); -} - -void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ FillB(dst, locations->InAt(0).AsRegister<GpuRegister>()); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ FillH(dst, locations->InAt(0).AsRegister<GpuRegister>()); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FillW(dst, locations->InAt(0).AsRegister<GpuRegister>()); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FillD(dst, locations->InAt(0).AsRegister<GpuRegister>()); - break; - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ ReplicateFPToVectorRegister(dst, - locations->InAt(0).AsFpuRegister<FpuRegister>(), - /* is_double= */ false); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ ReplicateFPToVectorRegister(dst, - locations->InAt(0).AsFpuRegister<FpuRegister>(), - /* is_double= */ true); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresRegister()); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister src = VectorRegisterFrom(locations->InAt(0)); - switch (instruction->GetPackedType()) { - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Copy_sW(locations->Out().AsRegister<GpuRegister>(), src, 0); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Copy_sD(locations->Out().AsRegister<GpuRegister>(), src, 0); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK_LE(2u, instruction->GetVectorLength()); - DCHECK_LE(instruction->GetVectorLength(), 4u); - DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -// Helper to set up locations for vector unary operations. -static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { - LocationSummary* locations = new (allocator) LocationSummary(instruction); - DataType::Type type = instruction->GetPackedType(); - switch (type) { - case DataType::Type::kBool: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), - instruction->IsVecNot() ? Location::kOutputOverlap - : Location::kNoOutputOverlap); - break; - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), - (instruction->IsVecNeg() || instruction->IsVecAbs() || - (instruction->IsVecReduce() && type == DataType::Type::kInt64)) - ? Location::kOutputOverlap - : Location::kNoOutputOverlap); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecReduce(HVecReduce* instruction) { - CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister src = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - VectorRegister tmp = static_cast<VectorRegister>(FTMP); - switch (instruction->GetPackedType()) { - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - switch (instruction->GetReductionKind()) { - case HVecReduce::kSum: - __ Hadd_sD(tmp, src, src); - __ IlvlD(dst, tmp, tmp); - __ AddvW(dst, dst, tmp); - break; - case HVecReduce::kMin: - __ IlvodW(tmp, src, src); - __ Min_sW(tmp, src, tmp); - __ IlvlW(dst, tmp, tmp); - __ Min_sW(dst, dst, tmp); - break; - case HVecReduce::kMax: - __ IlvodW(tmp, src, src); - __ Max_sW(tmp, src, tmp); - __ IlvlW(dst, tmp, tmp); - __ Max_sW(dst, dst, tmp); - break; - } - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - switch (instruction->GetReductionKind()) { - case HVecReduce::kSum: - __ IlvlD(dst, src, src); - __ AddvD(dst, dst, src); - break; - case HVecReduce::kMin: - __ IlvlD(dst, src, src); - __ Min_sD(dst, dst, src); - break; - case HVecReduce::kMax: - __ IlvlD(dst, src, src); - __ Max_sD(dst, dst, src); - break; - } - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) { - CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister src = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - DataType::Type from = instruction->GetInputType(); - DataType::Type to = instruction->GetResultType(); - if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) { - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Ffint_sW(dst, src); - } else { - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) { - CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister src = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ FillB(dst, ZERO); - __ SubvB(dst, dst, src); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ FillH(dst, ZERO); - __ SubvH(dst, dst, src); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FillW(dst, ZERO); - __ SubvW(dst, dst, src); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FillD(dst, ZERO); - __ SubvD(dst, dst, src); - break; - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FillW(dst, ZERO); - __ FsubW(dst, dst, src); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FillD(dst, ZERO); - __ FsubD(dst, dst, src); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) { - CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister src = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ FillB(dst, ZERO); // all zeroes - __ Add_aB(dst, dst, src); // dst = abs(0) + abs(src) - break; - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ FillH(dst, ZERO); // all zeroes - __ Add_aH(dst, dst, src); // dst = abs(0) + abs(src) - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FillW(dst, ZERO); // all zeroes - __ Add_aW(dst, dst, src); // dst = abs(0) + abs(src) - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FillD(dst, ZERO); // all zeroes - __ Add_aD(dst, dst, src); // dst = abs(0) + abs(src) - break; - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ LdiW(dst, -1); // all ones - __ SrliW(dst, dst, 1); - __ AndV(dst, dst, src); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ LdiD(dst, -1); // all ones - __ SrliD(dst, dst, 1); - __ AndV(dst, dst, src); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) { - CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister src = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: // special case boolean-not - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ LdiB(dst, 1); - __ XorV(dst, dst, src); - break; - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK_LE(2u, instruction->GetVectorLength()); - DCHECK_LE(instruction->GetVectorLength(), 16u); - __ NorV(dst, src, src); // lanes do not matter - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -// Helper to set up locations for vector binary operations. -static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (allocator) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecAdd(HVecAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ AddvB(dst, lhs, rhs); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ AddvH(dst, lhs, rhs); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ AddvW(dst, lhs, rhs); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ AddvD(dst, lhs, rhs); - break; - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FaddW(dst, lhs, rhs); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FaddD(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { - LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); -} - -void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - instruction->IsRounded() - ? __ Aver_uB(dst, lhs, rhs) - : __ Ave_uB(dst, lhs, rhs); - break; - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - instruction->IsRounded() - ? __ Aver_sB(dst, lhs, rhs) - : __ Ave_sB(dst, lhs, rhs); - break; - case DataType::Type::kUint16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - instruction->IsRounded() - ? __ Aver_uH(dst, lhs, rhs) - : __ Ave_uH(dst, lhs, rhs); - break; - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - instruction->IsRounded() - ? __ Aver_sH(dst, lhs, rhs) - : __ Ave_sH(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ SubvB(dst, lhs, rhs); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ SubvH(dst, lhs, rhs); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ SubvW(dst, lhs, rhs); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ SubvD(dst, lhs, rhs); - break; - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FsubW(dst, lhs, rhs); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FsubD(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) { - LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); -} - -void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ MulvB(dst, lhs, rhs); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ MulvH(dst, lhs, rhs); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ MulvW(dst, lhs, rhs); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ MulvD(dst, lhs, rhs); - break; - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FmulW(dst, lhs, rhs); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FmulD(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FdivW(dst, lhs, rhs); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FdivD(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ Min_uB(dst, lhs, rhs); - break; - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ Min_sB(dst, lhs, rhs); - break; - case DataType::Type::kUint16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ Min_uH(dst, lhs, rhs); - break; - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ Min_sH(dst, lhs, rhs); - break; - case DataType::Type::kUint32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Min_uW(dst, lhs, rhs); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Min_sW(dst, lhs, rhs); - break; - case DataType::Type::kUint64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Min_uD(dst, lhs, rhs); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Min_sD(dst, lhs, rhs); - break; - // When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value. - // TODO: Fix min(x, NaN) cases for float and double. - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FminW(dst, lhs, rhs); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FminD(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecMax(HVecMax* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ Max_uB(dst, lhs, rhs); - break; - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ Max_sB(dst, lhs, rhs); - break; - case DataType::Type::kUint16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ Max_uH(dst, lhs, rhs); - break; - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ Max_sH(dst, lhs, rhs); - break; - case DataType::Type::kUint32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Max_uW(dst, lhs, rhs); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Max_sW(dst, lhs, rhs); - break; - case DataType::Type::kUint64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Max_uD(dst, lhs, rhs); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Max_sD(dst, lhs, rhs); - break; - // When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value. - // TODO: Fix max(x, NaN) cases for float and double. - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FmaxW(dst, lhs, rhs); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FmaxD(dst, lhs, rhs); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK_LE(2u, instruction->GetVectorLength()); - DCHECK_LE(instruction->GetVectorLength(), 16u); - __ AndV(dst, lhs, rhs); // lanes do not matter - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecAndNot(HVecAndNot* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecAndNot(HVecAndNot* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void LocationsBuilderMIPS64::VisitVecOr(HVecOr* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK_LE(2u, instruction->GetVectorLength()); - DCHECK_LE(instruction->GetVectorLength(), 16u); - __ OrV(dst, lhs, rhs); // lanes do not matter - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK_LE(2u, instruction->GetVectorLength()); - DCHECK_LE(instruction->GetVectorLength(), 16u); - __ XorV(dst, lhs, rhs); // lanes do not matter - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -// Helper to set up locations for vector shift operations. -static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (allocator) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecShl(HVecShl* instruction) { - CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ SlliB(dst, lhs, value); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ SlliH(dst, lhs, value); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ SlliW(dst, lhs, value); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ SlliD(dst, lhs, value); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ SraiB(dst, lhs, value); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ SraiH(dst, lhs, value); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ SraiW(dst, lhs, value); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ SraiD(dst, lhs, value); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ SrliB(dst, lhs, value); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ SrliH(dst, lhs, value); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ SrliW(dst, lhs, value); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ SrliD(dst, lhs, value); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - - DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented - - HInstruction* input = instruction->InputAt(0); - bool is_zero = IsZeroBitPattern(input); - - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) - : Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) - : Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister dst = VectorRegisterFrom(locations->Out()); - - DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented - - // Zero out all other elements first. - __ FillW(dst, ZERO); - - // Shorthand for any type of zero. - if (IsZeroBitPattern(instruction->InputAt(0))) { - return; - } - - // Set required elements. - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ InsertB(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ InsertH(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ InsertW(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ InsertD(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -// Helper to set up locations for vector accumulations. -static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) { - LocationSummary* locations = new (allocator) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetInAt(2, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { - CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); -} - -void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister acc = VectorRegisterFrom(locations->InAt(0)); - VectorRegister left = VectorRegisterFrom(locations->InAt(1)); - VectorRegister right = VectorRegisterFrom(locations->InAt(2)); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->GetOpKind() == HInstruction::kAdd) { - __ MaddvB(acc, left, right); - } else { - __ MsubvB(acc, left, right); - } - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->GetOpKind() == HInstruction::kAdd) { - __ MaddvH(acc, left, right); - } else { - __ MsubvH(acc, left, right); - } - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->GetOpKind() == HInstruction::kAdd) { - __ MaddvW(acc, left, right); - } else { - __ MsubvW(acc, left, right); - } - break; - case DataType::Type::kInt64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->GetOpKind() == HInstruction::kAdd) { - __ MaddvD(acc, left, right); - } else { - __ MsubvD(acc, left, right); - } - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { - CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); - LocationSummary* locations = instruction->GetLocations(); - // All conversions require at least one temporary register. - locations->AddTemp(Location::RequiresFpuRegister()); - // Some conversions require a second temporary register. - HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); - HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); - DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), - HVecOperation::ToSignedType(b->GetPackedType())); - switch (a->GetPackedType()) { - case DataType::Type::kInt32: - if (instruction->GetPackedType() == DataType::Type::kInt32) { - break; - } - FALLTHROUGH_INTENDED; - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - locations->AddTemp(Location::RequiresFpuRegister()); - break; - default: - break; - } -} - -void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { - LocationSummary* locations = instruction->GetLocations(); - VectorRegister acc = VectorRegisterFrom(locations->InAt(0)); - VectorRegister left = VectorRegisterFrom(locations->InAt(1)); - VectorRegister right = VectorRegisterFrom(locations->InAt(2)); - VectorRegister tmp = static_cast<VectorRegister>(FTMP); - VectorRegister tmp1 = VectorRegisterFrom(locations->GetTemp(0)); - - DCHECK(locations->InAt(0).Equals(locations->Out())); - - // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S). - HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); - HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); - DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), - HVecOperation::ToSignedType(b->GetPackedType())); - switch (a->GetPackedType()) { - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, a->GetVectorLength()); - switch (instruction->GetPackedType()) { - case DataType::Type::kUint16: - case DataType::Type::kInt16: { - DCHECK_EQ(8u, instruction->GetVectorLength()); - VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); - __ FillB(tmp, ZERO); - __ Hadd_sH(tmp1, left, tmp); - __ Hadd_sH(tmp2, right, tmp); - __ Asub_sH(tmp1, tmp1, tmp2); - __ AddvH(acc, acc, tmp1); - __ Hadd_sH(tmp1, tmp, left); - __ Hadd_sH(tmp2, tmp, right); - __ Asub_sH(tmp1, tmp1, tmp2); - __ AddvH(acc, acc, tmp1); - break; - } - case DataType::Type::kInt32: { - DCHECK_EQ(4u, instruction->GetVectorLength()); - VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); - __ FillB(tmp, ZERO); - __ Hadd_sH(tmp1, left, tmp); - __ Hadd_sH(tmp2, right, tmp); - __ Asub_sH(tmp1, tmp1, tmp2); - __ Hadd_sW(tmp1, tmp1, tmp1); - __ AddvW(acc, acc, tmp1); - __ Hadd_sH(tmp1, tmp, left); - __ Hadd_sH(tmp2, tmp, right); - __ Asub_sH(tmp1, tmp1, tmp2); - __ Hadd_sW(tmp1, tmp1, tmp1); - __ AddvW(acc, acc, tmp1); - break; - } - case DataType::Type::kInt64: { - DCHECK_EQ(2u, instruction->GetVectorLength()); - VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); - __ FillB(tmp, ZERO); - __ Hadd_sH(tmp1, left, tmp); - __ Hadd_sH(tmp2, right, tmp); - __ Asub_sH(tmp1, tmp1, tmp2); - __ Hadd_sW(tmp1, tmp1, tmp1); - __ Hadd_sD(tmp1, tmp1, tmp1); - __ AddvD(acc, acc, tmp1); - __ Hadd_sH(tmp1, tmp, left); - __ Hadd_sH(tmp2, tmp, right); - __ Asub_sH(tmp1, tmp1, tmp2); - __ Hadd_sW(tmp1, tmp1, tmp1); - __ Hadd_sD(tmp1, tmp1, tmp1); - __ AddvD(acc, acc, tmp1); - break; - } - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, a->GetVectorLength()); - switch (instruction->GetPackedType()) { - case DataType::Type::kInt32: { - DCHECK_EQ(4u, instruction->GetVectorLength()); - VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); - __ FillH(tmp, ZERO); - __ Hadd_sW(tmp1, left, tmp); - __ Hadd_sW(tmp2, right, tmp); - __ Asub_sW(tmp1, tmp1, tmp2); - __ AddvW(acc, acc, tmp1); - __ Hadd_sW(tmp1, tmp, left); - __ Hadd_sW(tmp2, tmp, right); - __ Asub_sW(tmp1, tmp1, tmp2); - __ AddvW(acc, acc, tmp1); - break; - } - case DataType::Type::kInt64: { - DCHECK_EQ(2u, instruction->GetVectorLength()); - VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); - __ FillH(tmp, ZERO); - __ Hadd_sW(tmp1, left, tmp); - __ Hadd_sW(tmp2, right, tmp); - __ Asub_sW(tmp1, tmp1, tmp2); - __ Hadd_sD(tmp1, tmp1, tmp1); - __ AddvD(acc, acc, tmp1); - __ Hadd_sW(tmp1, tmp, left); - __ Hadd_sW(tmp2, tmp, right); - __ Asub_sW(tmp1, tmp1, tmp2); - __ Hadd_sD(tmp1, tmp1, tmp1); - __ AddvD(acc, acc, tmp1); - break; - } - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } - break; - case DataType::Type::kInt32: - DCHECK_EQ(4u, a->GetVectorLength()); - switch (instruction->GetPackedType()) { - case DataType::Type::kInt32: { - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ FillW(tmp, ZERO); - __ SubvW(tmp1, left, right); - __ Add_aW(tmp1, tmp1, tmp); - __ AddvW(acc, acc, tmp1); - break; - } - case DataType::Type::kInt64: { - DCHECK_EQ(2u, instruction->GetVectorLength()); - VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); - __ FillW(tmp, ZERO); - __ Hadd_sD(tmp1, left, tmp); - __ Hadd_sD(tmp2, right, tmp); - __ Asub_sD(tmp1, tmp1, tmp2); - __ AddvD(acc, acc, tmp1); - __ Hadd_sD(tmp1, tmp, left); - __ Hadd_sD(tmp2, tmp, right); - __ Asub_sD(tmp1, tmp1, tmp2); - __ AddvD(acc, acc, tmp1); - break; - } - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } - break; - case DataType::Type::kInt64: { - DCHECK_EQ(2u, a->GetVectorLength()); - switch (instruction->GetPackedType()) { - case DataType::Type::kInt64: { - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ FillD(tmp, ZERO); - __ SubvD(tmp1, left, right); - __ Add_aD(tmp1, tmp1, tmp); - __ AddvD(acc, acc, tmp1); - break; - } - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } - break; - } - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecDotProd(HVecDotProd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorMIPS64::VisitVecDotProd(HVecDotProd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -// Helper to set up locations for vector memory operations. -static void CreateVecMemLocations(ArenaAllocator* allocator, - HVecMemoryOperation* instruction, - bool is_load) { - LocationSummary* locations = new (allocator) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (is_load) { - locations->SetOut(Location::RequiresFpuRegister()); - } else { - locations->SetInAt(2, Location::RequiresFpuRegister()); - } - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -// Helper to prepare register and offset for vector memory operations. Returns the offset and sets -// the output parameter adjusted_base to the original base or to a reserved temporary register (AT). -int32_t InstructionCodeGeneratorMIPS64::VecAddress(LocationSummary* locations, - size_t size, - /* out */ GpuRegister* adjusted_base) { - GpuRegister base = locations->InAt(0).AsRegister<GpuRegister>(); - Location index = locations->InAt(1); - int scale = TIMES_1; - switch (size) { - case 2: scale = TIMES_2; break; - case 4: scale = TIMES_4; break; - case 8: scale = TIMES_8; break; - default: break; - } - int32_t offset = mirror::Array::DataOffset(size).Int32Value(); - - if (index.IsConstant()) { - offset += index.GetConstant()->AsIntConstant()->GetValue() << scale; - __ AdjustBaseOffsetAndElementSizeShift(base, offset, scale); - *adjusted_base = base; - } else { - GpuRegister index_reg = index.AsRegister<GpuRegister>(); - if (scale != TIMES_1) { - __ Dlsa(AT, index_reg, base, scale); - } else { - __ Daddu(AT, base, index_reg); - } - *adjusted_base = AT; - } - return offset; -} - -void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) { - CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ true); -} - -void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { - LocationSummary* locations = instruction->GetLocations(); - size_t size = DataType::Size(instruction->GetPackedType()); - VectorRegister reg = VectorRegisterFrom(locations->Out()); - GpuRegister base; - int32_t offset = VecAddress(locations, size, &base); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ LdB(reg, base, offset); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - // Loading 8-bytes (needed if dealing with compressed strings in StringCharAt) from unaligned - // memory address may cause a trap to the kernel if the CPU doesn't directly support unaligned - // loads and stores. - // TODO: Implement support for StringCharAt. - DCHECK(!instruction->IsStringCharAt()); - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ LdH(reg, base, offset); - break; - case DataType::Type::kInt32: - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ LdW(reg, base, offset); - break; - case DataType::Type::kInt64: - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ LdD(reg, base, offset); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) { - CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ false); -} - -void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) { - LocationSummary* locations = instruction->GetLocations(); - size_t size = DataType::Size(instruction->GetPackedType()); - VectorRegister reg = VectorRegisterFrom(locations->InAt(2)); - GpuRegister base; - int32_t offset = VecAddress(locations, size, &base); - switch (instruction->GetPackedType()) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - DCHECK_EQ(16u, instruction->GetVectorLength()); - __ StB(reg, base, offset); - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - DCHECK_EQ(8u, instruction->GetVectorLength()); - __ StH(reg, base, offset); - break; - case DataType::Type::kInt32: - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instruction->GetVectorLength()); - __ StW(reg, base, offset); - break; - case DataType::Type::kInt64: - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instruction->GetVectorLength()); - __ StD(reg, base, offset); - break; - default: - LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); - UNREACHABLE(); - } -} - -#undef __ - -} // namespace mips64 -} // namespace art diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 0ee00356b9..1390af2435 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -63,9 +63,10 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i LocationSummary* locations = instruction->GetLocations(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + bool cpu_has_avx = CpuHasAvxFeatureFlag(); // Shorthand for any type of zero. if (IsZeroBitPattern(instruction->InputAt(0))) { - __ xorps(dst, dst); + cpu_has_avx ? __ vxorps(dst, dst, dst) : __ xorps(dst, dst); return; } @@ -431,41 +432,69 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati } } +static void CreateVecTerOpLocations(ArenaAllocator* allocator, HVecOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kUint8: case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ paddb(dst, src); + cpu_has_avx ? __ vpaddb(dst, other_src, src) : __ paddb(dst, src); break; case DataType::Type::kUint16: case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); - __ paddw(dst, src); + cpu_has_avx ? __ vpaddw(dst, other_src, src) : __ paddw(dst, src); break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ paddd(dst, src); + cpu_has_avx ? __ vpaddd(dst, other_src, src) : __ paddd(dst, src); break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ paddq(dst, src); + cpu_has_avx ? __ vpaddq(dst, other_src, src) : __ paddq(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ addps(dst, src); + cpu_has_avx ? __ vaddps(dst, other_src, src) : __ addps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ addpd(dst, src); + cpu_has_avx ? __ vaddpd(dst, other_src, src) : __ addpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -533,40 +562,46 @@ void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction } void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kUint8: case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ psubb(dst, src); + cpu_has_avx ? __ vpsubb(dst, other_src, src) : __ psubb(dst, src); break; case DataType::Type::kUint16: case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); - __ psubw(dst, src); + cpu_has_avx ? __ vpsubw(dst, other_src, src) : __ psubw(dst, src); break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ psubd(dst, src); + cpu_has_avx ? __ vpsubd(dst, other_src, src) : __ psubd(dst, src); break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ psubq(dst, src); + cpu_has_avx ? __ vpsubq(dst, other_src, src) : __ psubq(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ subps(dst, src); + cpu_has_avx ? __ vsubps(dst, other_src, src) : __ subps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ subpd(dst, src); + cpu_has_avx ? __ vsubpd(dst, other_src, src) : __ subpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -607,31 +642,37 @@ void InstructionCodeGeneratorX86::VisitVecSaturationSub(HVecSaturationSub* instr } void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kUint16: case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); - __ pmullw(dst, src); + cpu_has_avx ? __ vpmullw(dst, other_src, src) : __ pmullw(dst, src); break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ pmulld(dst, src); + cpu_has_avx ? __ vpmulld(dst, other_src, src) : __ pmulld(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ mulps(dst, src); + cpu_has_avx ? __ vmulps(dst, other_src, src) : __ mulps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ mulpd(dst, src); + cpu_has_avx ? __ vmulpd(dst, other_src, src) : __ mulpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -640,22 +681,28 @@ void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) { } void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ divps(dst, src); + cpu_has_avx ? __ vdivps(dst, other_src, src) : __ divps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ divpd(dst, src); + cpu_has_avx ? __ vdivpd(dst, other_src, src) : __ divpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -762,14 +809,20 @@ void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { } void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -780,15 +833,15 @@ void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) { case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ pand(dst, src); + cpu_has_avx ? __ vpand(dst, other_src, src) : __ pand(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ andps(dst, src); + cpu_has_avx ? __ vandps(dst, other_src, src) : __ andps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ andpd(dst, src); + cpu_has_avx ? __ vandpd(dst, other_src, src) : __ andpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -797,14 +850,20 @@ void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) { } void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -815,15 +874,15 @@ void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) { case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ pandn(dst, src); + cpu_has_avx ? __ vpandn(dst, other_src, src) : __ pandn(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ andnps(dst, src); + cpu_has_avx ? __ vandnps(dst, other_src, src) : __ andnps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ andnpd(dst, src); + cpu_has_avx ? __ vandnpd(dst, other_src, src) : __ andnpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -832,14 +891,20 @@ void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) { } void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -850,15 +915,15 @@ void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) { case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ por(dst, src); + cpu_has_avx ? __ vpor(dst, other_src, src) : __ por(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ orps(dst, src); + cpu_has_avx ? __ vorps(dst, other_src, src) : __ orps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ orpd(dst, src); + cpu_has_avx ? __ vorpd(dst, other_src, src) : __ orpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -867,14 +932,20 @@ void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) { } void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -885,15 +956,15 @@ void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) { case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ pxor(dst, src); + cpu_has_avx ? __ vpxor(dst, other_src, src) : __ pxor(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ xorps(dst, src); + cpu_has_avx ? __ vxorps(dst, other_src, src) : __ xorps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ xorpd(dst, src); + cpu_has_avx ? __ vxorpd(dst, other_src, src) : __ xorpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -1046,7 +1117,8 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented // Zero out all other elements first. - __ xorps(dst, dst); + bool cpu_has_avx = CpuHasAvxFeatureFlag(); + cpu_has_avx ? __ vxorps(dst, dst, dst) : __ xorps(dst, dst); // Shorthand for any type of zero. if (IsZeroBitPattern(instruction->InputAt(0))) { @@ -1129,11 +1201,38 @@ void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instr } void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); } void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + bool cpu_has_avx = CpuHasAvxFeatureFlag(); + LocationSummary* locations = instruction->GetLocations(); + XmmRegister acc = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister left = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister right = locations->InAt(2).AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + if (!cpu_has_avx) { + __ movaps(tmp, right); + __ pmaddwd(tmp, left); + __ paddd(acc, tmp); + } else { + __ vpmaddwd(tmp, left, right); + __ vpaddd(acc, acc, tmp); + } + break; + } + default: + LOG(FATAL) << "Unsupported SIMD Type" << instruction->GetPackedType(); + UNREACHABLE(); + } } // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 9c2882766c..7fac44dea8 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -58,9 +58,10 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar LocationSummary* locations = instruction->GetLocations(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + bool cpu_has_avx = CpuHasAvxFeatureFlag(); // Shorthand for any type of zero. if (IsZeroBitPattern(instruction->InputAt(0))) { - __ xorps(dst, dst); + cpu_has_avx ? __ vxorps(dst, dst, dst) : __ xorps(dst, dst); return; } @@ -414,41 +415,69 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati } } +static void CreateVecTerOpLocations(ArenaAllocator* allocator, HVecOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kUint8: case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ paddb(dst, src); + cpu_has_avx ? __ vpaddb(dst, other_src, src) : __ paddb(dst, src); break; case DataType::Type::kUint16: case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); - __ paddw(dst, src); + cpu_has_avx ? __ vpaddw(dst, other_src, src) : __ paddw(dst, src); break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ paddd(dst, src); + cpu_has_avx ? __ vpaddd(dst, other_src, src) : __ paddd(dst, src); break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ paddq(dst, src); + cpu_has_avx ? __ vpaddq(dst, other_src, src) : __ paddq(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ addps(dst, src); + cpu_has_avx ? __ vaddps(dst, other_src, src) : __ addps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ addpd(dst, src); + cpu_has_avx ? __ vaddpd(dst, other_src, src) : __ addpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -516,40 +545,46 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct } void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kUint8: case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ psubb(dst, src); + cpu_has_avx ? __ vpsubb(dst, other_src, src) : __ psubb(dst, src); break; case DataType::Type::kUint16: case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); - __ psubw(dst, src); + cpu_has_avx ? __ vpsubw(dst, other_src, src) : __ psubw(dst, src); break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ psubd(dst, src); + cpu_has_avx ? __ vpsubd(dst, other_src, src) : __ psubd(dst, src); break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ psubq(dst, src); + cpu_has_avx ? __ vpsubq(dst, other_src, src) : __ psubq(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ subps(dst, src); + cpu_has_avx ? __ vsubps(dst, other_src, src) : __ subps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ subpd(dst, src); + cpu_has_avx ? __ vsubpd(dst, other_src, src) : __ subpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -590,31 +625,37 @@ void InstructionCodeGeneratorX86_64::VisitVecSaturationSub(HVecSaturationSub* in } void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kUint16: case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); - __ pmullw(dst, src); + cpu_has_avx ? __ vpmullw(dst, other_src, src) : __ pmullw(dst, src); break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ pmulld(dst, src); + cpu_has_avx ? __ vpmulld(dst, other_src, src): __ pmulld(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ mulps(dst, src); + cpu_has_avx ? __ vmulps(dst, other_src, src) : __ mulps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ mulpd(dst, src); + cpu_has_avx ? __ vmulpd(dst, other_src, src) : __ mulpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -623,22 +664,28 @@ void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) { } void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ divps(dst, src); + cpu_has_avx ? __ vdivps(dst, other_src, src) : __ divps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ divpd(dst, src); + cpu_has_avx ? __ vdivpd(dst, other_src, src) : __ divpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -745,14 +792,20 @@ void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { } void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -763,15 +816,15 @@ void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ pand(dst, src); + cpu_has_avx ? __ vpand(dst, other_src, src) : __ pand(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ andps(dst, src); + cpu_has_avx ? __ vandps(dst, other_src, src) : __ andps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ andpd(dst, src); + cpu_has_avx ? __ vandpd(dst, other_src, src) : __ andpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -780,14 +833,20 @@ void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { } void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -798,15 +857,15 @@ void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ pandn(dst, src); + cpu_has_avx ? __ vpandn(dst, other_src, src) : __ pandn(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ andnps(dst, src); + cpu_has_avx ? __ vandnps(dst, other_src, src) : __ andnps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ andnpd(dst, src); + cpu_has_avx ? __ vandnpd(dst, other_src, src) : __ andnpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -815,14 +874,20 @@ void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { } void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -833,15 +898,15 @@ void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ por(dst, src); + cpu_has_avx ? __ vpor(dst, other_src, src) : __ por(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ orps(dst, src); + cpu_has_avx ? __ vorps(dst, other_src, src) : __ orps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ orpd(dst, src); + cpu_has_avx ? __ vorpd(dst, other_src, src) : __ orpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -850,14 +915,20 @@ void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { } void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) { - CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + if (CpuHasAvxFeatureFlag()) { + CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction); + } else { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); + } } void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) { + bool cpu_has_avx = CpuHasAvxFeatureFlag(); LocationSummary* locations = instruction->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + DCHECK(cpu_has_avx || other_src == dst); switch (instruction->GetPackedType()) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -868,15 +939,15 @@ void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) { case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ pxor(dst, src); + cpu_has_avx ? __ vpxor(dst, other_src, src) : __ pxor(dst, src); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ xorps(dst, src); + cpu_has_avx ? __ vxorps(dst, other_src, src) : __ xorps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ xorpd(dst, src); + cpu_has_avx ? __ vxorpd(dst, other_src, src) : __ xorpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -1024,7 +1095,8 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented // Zero out all other elements first. - __ xorps(dst, dst); + bool cpu_has_avx = CpuHasAvxFeatureFlag(); + cpu_has_avx ? __ vxorps(dst, dst, dst) : __ xorps(dst, dst); // Shorthand for any type of zero. if (IsZeroBitPattern(instruction->InputAt(0))) { @@ -1102,11 +1174,38 @@ void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* in } void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); } void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + bool cpu_has_avx = CpuHasAvxFeatureFlag(); + LocationSummary* locations = instruction->GetLocations(); + XmmRegister acc = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister left = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister right = locations->InAt(2).AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + if (!cpu_has_avx) { + __ movaps(tmp, right); + __ pmaddwd(tmp, left); + __ paddd(acc, tmp); + } else { + __ vpmaddwd(tmp, left, right); + __ vpaddd(acc, acc, tmp); + } + break; + } + default: + LOG(FATAL) << "Unsupported SIMD Type" << instruction->GetPackedType(); + UNREACHABLE(); + } } // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 95118b0b6d..ed1a536239 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -16,7 +16,7 @@ #include "code_generator_x86.h" -#include "art_method.h" +#include "art_method-inl.h" #include "class_table.h" #include "code_generator_utils.h" #include "compiled_method.h" @@ -27,10 +27,12 @@ #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_x86.h" +#include "jit/profiling_info.h" #include "linker/linker_patch.h" #include "lock_word.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" +#include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/assembler.h" #include "utils/stack_checks.h" @@ -987,7 +989,7 @@ size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t } else { __ movsd(Address(ESP, stack_index), XmmRegister(reg_id)); } - return GetFloatingPointSpillSlotSize(); + return GetSlowPathFPWidth(); } size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { @@ -996,7 +998,7 @@ size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32 } else { __ movsd(XmmRegister(reg_id), Address(ESP, stack_index)); } - return GetFloatingPointSpillSlotSize(); + return GetSlowPathFPWidth(); } void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -1045,7 +1047,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), constant_area_start_(-1), @@ -1070,6 +1072,76 @@ static dwarf::Reg DWARFReg(Register reg) { return dwarf::Reg::X86Core(static_cast<int>(reg)); } +void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) { + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + Register reg = EAX; + if (is_frame_entry) { + reg = kMethodRegisterArgument; + } else { + __ pushl(EAX); + __ movl(EAX, Address(ESP, kX86WordSize)); + } + NearLabel overflow; + __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()), + Immediate(ArtMethod::MaxCounter())); + __ j(kEqual, &overflow); + __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()), + Immediate(1)); + __ Bind(&overflow); + if (!is_frame_entry) { + __ popl(EAX); + } + } + + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + if (info != nullptr) { + uint32_t address = reinterpret_cast32<uint32_t>(info); + NearLabel done; + if (HasEmptyFrame()) { + CHECK(is_frame_entry); + // Alignment + __ subl(ESP, Immediate(8)); + __ cfi().AdjustCFAOffset(8); + // We need a temporary. The stub also expects the method at bottom of stack. + __ pushl(EAX); + __ cfi().AdjustCFAOffset(4); + __ movl(EAX, Immediate(address)); + __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), + Immediate(1)); + __ j(kCarryClear, &done); + GenerateInvokeRuntime( + GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value()); + __ Bind(&done); + // We don't strictly require to restore EAX, but this makes the generated + // code easier to reason about. + __ popl(EAX); + __ cfi().AdjustCFAOffset(-4); + __ addl(ESP, Immediate(8)); + __ cfi().AdjustCFAOffset(-8); + } else { + if (!RequiresCurrentMethod()) { + CHECK(is_frame_entry); + __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument); + } + // We need a temporary. + __ pushl(EAX); + __ cfi().AdjustCFAOffset(4); + __ movl(EAX, Immediate(address)); + __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), + Immediate(1)); + __ popl(EAX); // Put stack as expected before exiting or calling stub. + __ cfi().AdjustCFAOffset(-4); + __ j(kCarryClear, &done); + GenerateInvokeRuntime( + GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value()); + __ Bind(&done); + } + } + } +} + void CodeGeneratorX86::GenerateFrameEntry() { __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address __ Bind(&frame_entry_label_); @@ -1077,44 +1149,39 @@ void CodeGeneratorX86::GenerateFrameEntry() { IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - if (GetCompilerOptions().CountHotnessInCompiledCode()) { - __ addw(Address(kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()), - Immediate(1)); - } - if (!skip_overflow_check) { size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86); __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes))); RecordPcInfo(nullptr, 0); } - if (HasEmptyFrame()) { - return; - } + if (!HasEmptyFrame()) { + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { + Register reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + __ pushl(reg); + __ cfi().AdjustCFAOffset(kX86WordSize); + __ cfi().RelOffset(DWARFReg(reg), 0); + } + } - for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { - Register reg = kCoreCalleeSaves[i]; - if (allocated_registers_.ContainsCoreRegister(reg)) { - __ pushl(reg); - __ cfi().AdjustCFAOffset(kX86WordSize); - __ cfi().RelOffset(DWARFReg(reg), 0); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ subl(ESP, Immediate(adjust)); + __ cfi().AdjustCFAOffset(adjust); + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument); } - } - int adjust = GetFrameSize() - FrameEntrySpillSize(); - __ subl(ESP, Immediate(adjust)); - __ cfi().AdjustCFAOffset(adjust); - // Save the current method if we need it. Note that we do not - // do this in HCurrentMethod, as the instruction might have been removed - // in the SSA graph. - if (RequiresCurrentMethod()) { - __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument); + if (GetGraph()->HasShouldDeoptimizeFlag()) { + // Initialize should_deoptimize flag to 0. + __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); + } } - if (GetGraph()->HasShouldDeoptimizeFlag()) { - // Initialize should_deoptimize flag to 0. - __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); - } + MaybeIncrementHotness(/* is_frame_entry= */ true); } void CodeGeneratorX86::GenerateFrameExit() { @@ -1382,12 +1449,7 @@ void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* suc HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - __ pushl(EAX); - __ movl(EAX, Address(ESP, kX86WordSize)); - __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1)); - __ popl(EAX); - } + codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -2154,31 +2216,46 @@ void LocationsBuilderX86::VisitReturn(HReturn* ret) { } void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { - if (kIsDebugBuild) { - switch (ret->InputAt(0)->GetType()) { - case DataType::Type::kReference: - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX); - break; + switch (ret->InputAt(0)->GetType()) { + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX); + break; - case DataType::Type::kInt64: - DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX); - DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX); - break; + case DataType::Type::kInt64: + DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX); + DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX); + break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0); - break; + case DataType::Type::kFloat32: + DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0); + if (GetGraph()->IsCompilingOsr()) { + // To simplify callers of an OSR method, we put the return value in both + // floating point and core registers. + __ movd(EAX, XMM0); + } + break; - default: - LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType(); - } + case DataType::Type::kFloat64: + DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0); + if (GetGraph()->IsCompilingOsr()) { + // To simplify callers of an OSR method, we put the return value in both + // floating point and core registers. + __ movd(EAX, XMM0); + // Use XMM1 as temporary register to not clobber XMM0. + __ movaps(XMM1, XMM0); + __ psrlq(XMM1, Immediate(32)); + __ movd(EDX, XMM1); + } + break; + + default: + LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType(); } codegen_->GenerateFrameExit(); } @@ -2247,6 +2324,11 @@ void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { } HandleInvoke(invoke); + + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + // Add one temporary for inline cache update. + invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP)); + } } void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) { @@ -2270,6 +2352,41 @@ void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { HandleInvoke(invoke); // Add the hidden argument. invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7)); + + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + // Add one temporary for inline cache update. + invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP)); + } +} + +void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) { + DCHECK_EQ(EAX, klass); + // We know the destination of an intrinsic, so no need to record inline + // caches (also the intrinsic location builder doesn't request an additional + // temporary). + if (!instruction->GetLocations()->Intrinsified() && + GetGraph()->IsCompilingBaseline() && + !Runtime::Current()->IsAotCompiler()) { + DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke()); + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + if (info != nullptr) { + InlineCache* cache = info->GetInlineCache(instruction->GetDexPc()); + uint32_t address = reinterpret_cast32<uint32_t>(cache); + if (kIsDebugBuild) { + uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u; + CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>()); + } + Register temp = EBP; + NearLabel done; + __ movl(temp, Immediate(address)); + // Fast path for a monomorphic cache. + __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value())); + __ j(kEqual, &done); + GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value()); + __ Bind(&done); + } + } } void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -2303,6 +2420,9 @@ void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) // intact/accessible until the end of the marking phase (the // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); + + codegen_->MaybeGenerateInlineCacheCheck(invoke, temp); + // temp = temp->GetAddressOfIMT() __ movl(temp, Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value())); @@ -4853,7 +4973,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall( callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { - DCHECK(GetCompilerOptions().IsBootImage()); + DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()); __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset)); @@ -4874,6 +4994,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall( temp.AsRegister<Register>()); __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); RecordMethodBssEntryPatch(invoke); + // No need for memory fence, thanks to the x86 memory model. break; } case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: @@ -4925,6 +5046,9 @@ void CodeGeneratorX86::GenerateVirtualCall( // intact/accessible until the end of the marking phase (the // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); + + MaybeGenerateInlineCacheCheck(invoke, temp); + // temp = temp->GetMethodAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); @@ -4935,16 +5059,16 @@ void CodeGeneratorX86::GenerateVirtualCall( void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address, uint32_t intrinsic_data) { - boot_image_intrinsic_patches_.emplace_back( + boot_image_other_patches_.emplace_back( method_address, /* target_dex_file= */ nullptr, intrinsic_data); - __ Bind(&boot_image_intrinsic_patches_.back().label); + __ Bind(&boot_image_other_patches_.back().label); } void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, uint32_t boot_image_offset) { - boot_image_method_patches_.emplace_back( + boot_image_other_patches_.emplace_back( method_address, /* target_dex_file= */ nullptr, boot_image_offset); - __ Bind(&boot_image_method_patches_.back().label); + __ Bind(&boot_image_other_patches_.back().label); } void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { @@ -5089,23 +5213,26 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke type_bss_entry_patches_.size() + boot_image_string_patches_.size() + string_bss_entry_patches_.size() + - boot_image_intrinsic_patches_.size(); + boot_image_other_patches_.size(); linker_patches->reserve(size); - if (GetCompilerOptions().IsBootImage()) { + if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); - EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( - boot_image_intrinsic_patches_, linker_patches); } else { - EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( - boot_image_method_patches_, linker_patches); + DCHECK(boot_image_method_patches_.empty()); DCHECK(boot_image_type_patches_.empty()); DCHECK(boot_image_string_patches_.empty()); - DCHECK(boot_image_intrinsic_patches_.empty()); + } + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_other_patches_, linker_patches); + } else { + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_other_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -5510,6 +5637,15 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instr HandleFieldGet(instruction, instruction->GetFieldInfo()); } +void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { + codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX)); +} + +void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { + __ movl(EAX, Immediate(instruction->GetFormat()->GetValue())); + codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc()); +} + void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet( HUnresolvedInstanceFieldGet* instruction) { FieldAccessCallingConventionX86 calling_convention; @@ -5781,13 +5917,11 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool needs_type_check = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, - may_need_runtime_call_for_type_check ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); + needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); bool is_byte_type = DataType::Size(value_type) == 1u; // We need the inputs to be different than the output in case of long operation. @@ -5818,10 +5952,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Location index = locations->InAt(1); Location value = locations->InAt(2); DataType::Type value_type = instruction->GetComponentType(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool needs_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -5864,30 +5995,30 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { __ movl(address, Immediate(0)); codegen_->MaybeRecordImplicitNullCheck(instruction); DCHECK(!needs_write_barrier); - DCHECK(!may_need_runtime_call_for_type_check); + DCHECK(!needs_type_check); break; } DCHECK(needs_write_barrier); Register register_value = value.AsRegister<Register>(); - // We cannot use a NearLabel for `done`, as its range may be too - // short when Baker read barriers are enabled. - Label done; - NearLabel not_null, do_put; - SlowPathCode* slow_path = nullptr; Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); - if (may_need_runtime_call_for_type_check) { + + bool can_value_be_null = instruction->GetValueCanBeNull(); + NearLabel do_store; + if (can_value_be_null) { + __ testl(register_value, register_value); + __ j(kEqual, &do_store); + } + + SlowPathCode* slow_path = nullptr; + if (needs_type_check) { slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction); codegen_->AddSlowPath(slow_path); - if (instruction->GetValueCanBeNull()) { - __ testl(register_value, register_value); - __ j(kNotEqual, ¬_null); - __ movl(address, Immediate(0)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ jmp(&done); - __ Bind(¬_null); - } + + const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); // Note that when Baker read barriers are enabled, the type // checks are performed without read barriers. This is fine, @@ -5910,6 +6041,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { __ cmpl(temp, Address(register_value, class_offset)); if (instruction->StaticTypeOfArrayIsObjectArray()) { + NearLabel do_put; __ j(kEqual, &do_put); // If heap poisoning is enabled, the `temp` reference has // not been unpoisoned yet; unpoison it now. @@ -5926,22 +6058,28 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { } } + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen_->MarkGCCard( + temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false); + + if (can_value_be_null) { + DCHECK(do_store.IsLinked()); + __ Bind(&do_store); + } + + Register source = register_value; if (kPoisonHeapReferences) { __ movl(temp, register_value); __ PoisonHeapReference(temp); - __ movl(address, temp); - } else { - __ movl(address, register_value); + source = temp; } - if (!may_need_runtime_call_for_type_check) { + + __ movl(address, source); + + if (can_value_be_null || !needs_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard( - temp, card, array, value.AsRegister<Register>(), instruction->GetValueCanBeNull()); - __ Bind(&done); - if (slow_path != nullptr) { __ Bind(slow_path->GetExitLabel()); } @@ -6586,7 +6724,8 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK(codegen_->GetCompilerOptions().IsBootImage() || + codegen_->GetCompilerOptions().IsBootImageExtension()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); @@ -6606,6 +6745,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE Address address(method_address, CodeGeneratorX86::kDummy32BitOffset); Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls); GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); + // No need for memory fence, thanks to the x86 memory model. generate_null_check = true; break; } @@ -6693,13 +6833,12 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); const size_t status_byte_offset = mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_initialized_value = - enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + constexpr uint32_t shifted_visibly_initialized_value = + enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); - __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value)); + __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value)); __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); - // No need for memory fence, thanks to the X86 memory model. } void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, @@ -6783,7 +6922,8 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK(codegen_->GetCompilerOptions().IsBootImage() || + codegen_->GetCompilerOptions().IsBootImageExtension()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); codegen_->RecordBootImageStringPatch(load); @@ -6803,6 +6943,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::String> */ out = *address /* PC-relative */ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); + // No need for memory fence, thanks to the x86 memory model. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load); codegen_->AddSlowPath(slow_path); __ testl(out, out); @@ -8236,6 +8377,7 @@ class JumpTableRIPFixup : public RIPFixup { void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { // Generate the constant area if needed. X86Assembler* assembler = GetAssembler(); + if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) { // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 // byte values. @@ -8407,6 +8549,19 @@ void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* LOG(FATAL) << "Unreachable"; } +bool LocationsBuilderX86::CpuHasAvxFeatureFlag() { + return codegen_->GetInstructionSetFeatures().HasAVX(); +} +bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() { + return codegen_->GetInstructionSetFeatures().HasAVX2(); +} +bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() { + return codegen_->GetInstructionSetFeatures().HasAVX(); +} +bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() { + return codegen_->GetInstructionSetFeatures().HasAVX2(); +} + #undef __ } // namespace x86 diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index deeef888e2..16446ce561 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -175,6 +175,8 @@ class LocationsBuilderX86 : public HGraphVisitor { void HandleShift(HBinaryOperation* instruction); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + bool CpuHasAvxFeatureFlag(); + bool CpuHasAvx2FeatureFlag(); CodeGeneratorX86* const codegen_; InvokeDexCallingConventionVisitorX86 parameter_visitor_; @@ -307,6 +309,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { HBasicBlock* default_block); void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double); + bool CpuHasAvxFeatureFlag(); + bool CpuHasAvx2FeatureFlag(); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -353,12 +357,16 @@ class CodeGeneratorX86 : public CodeGenerator { return kX86WordSize; } - size_t GetFloatingPointSpillSlotSize() const override { + size_t GetSlowPathFPWidth() const override { return GetGraph()->HasSIMD() ? 4 * kX86WordSize // 16 bytes == 4 words for each spill : 2 * kX86WordSize; // 8 bytes == 2 words for each spill } + size_t GetCalleePreservedFPWidth() const override { + return 2 * kX86WordSize; + } + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } @@ -616,6 +624,9 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction) override; void GenerateExplicitNullCheck(HNullCheck* instruction) override; + void MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass); + void MaybeIncrementHotness(bool is_frame_entry); + // When we don't know the proper offset for the value, we use kDummy32BitOffset. // The correct value will be inserted when processing Assembler fixups. static constexpr int32_t kDummy32BitOffset = 256; @@ -644,8 +655,7 @@ class CodeGeneratorX86 : public CodeGenerator { ParallelMoveResolverX86 move_resolver_; X86Assembler assembler_; - // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. - // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). + // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_; @@ -657,8 +667,9 @@ class CodeGeneratorX86 : public CodeGenerator { ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_; - // PC-relative patch info for IntrinsicObjects. - ArenaDeque<X86PcRelativePatchInfo> boot_image_intrinsic_patches_; + // PC-relative patch info for IntrinsicObjects for the boot image, + // and for method/type/string patches for kBootImageRelRo otherwise. + ArenaDeque<X86PcRelativePatchInfo> boot_image_other_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 7c293b8605..8518b6db19 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -16,7 +16,7 @@ #include "code_generator_x86_64.h" -#include "art_method.h" +#include "art_method-inl.h" #include "class_table.h" #include "code_generator_utils.h" #include "compiled_method.h" @@ -26,11 +26,13 @@ #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_x86_64.h" +#include "jit/profiling_info.h" #include "linker/linker_patch.h" #include "lock_word.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "mirror/object_reference.h" +#include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/assembler.h" #include "utils/stack_checks.h" @@ -999,7 +1001,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall( callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: - DCHECK(GetCompilerOptions().IsBootImage()); + DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); __ leal(temp.AsRegister<CpuRegister>(), Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); RecordBootImageMethodPatch(invoke); @@ -1015,6 +1017,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall( __ movq(temp.AsRegister<CpuRegister>(), Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); RecordMethodBssEntryPatch(invoke); + // No need for memory fence, thanks to the x86-64 memory model. break; } case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: @@ -1067,6 +1070,9 @@ void CodeGeneratorX86_64::GenerateVirtualCall( // intact/accessible until the end of the marking phase (the // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); + + MaybeGenerateInlineCacheCheck(invoke, temp); + // temp = temp->GetMethodAt(method_offset); __ movq(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); @@ -1076,13 +1082,13 @@ void CodeGeneratorX86_64::GenerateVirtualCall( } void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) { - boot_image_intrinsic_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data); - __ Bind(&boot_image_intrinsic_patches_.back().label); + boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data); + __ Bind(&boot_image_other_patches_.back().label); } void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) { - boot_image_method_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset); - __ Bind(&boot_image_method_patches_.back().label); + boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset); + __ Bind(&boot_image_other_patches_.back().label); } void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { @@ -1190,23 +1196,26 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li type_bss_entry_patches_.size() + boot_image_string_patches_.size() + string_bss_entry_patches_.size() + - boot_image_intrinsic_patches_.size(); + boot_image_other_patches_.size(); linker_patches->reserve(size); - if (GetCompilerOptions().IsBootImage()) { + if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); - EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( - boot_image_intrinsic_patches_, linker_patches); } else { - EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( - boot_image_method_patches_, linker_patches); + DCHECK(boot_image_method_patches_.empty()); DCHECK(boot_image_type_patches_.empty()); DCHECK(boot_image_string_patches_.empty()); - DCHECK(boot_image_intrinsic_patches_.empty()); + } + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_other_patches_, linker_patches); + } else { + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_other_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1245,7 +1254,7 @@ size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32 } else { __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); } - return GetFloatingPointSpillSlotSize(); + return GetSlowPathFPWidth(); } size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { @@ -1254,7 +1263,7 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin } else { __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); } - return GetFloatingPointSpillSlotSize(); + return GetSlowPathFPWidth(); } void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -1308,7 +1317,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { @@ -1337,6 +1346,55 @@ static dwarf::Reg DWARFReg(FloatRegister reg) { return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); } +void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) { + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + NearLabel overflow; + Register method = kMethodRegisterArgument; + if (!is_frame_entry) { + CHECK(RequiresCurrentMethod()); + method = TMP; + __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset)); + } + __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()), + Immediate(ArtMethod::MaxCounter())); + __ j(kEqual, &overflow); + __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()), + Immediate(1)); + __ Bind(&overflow); + } + + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + if (info != nullptr) { + uint64_t address = reinterpret_cast64<uint64_t>(info); + NearLabel done; + __ movq(CpuRegister(TMP), Immediate(address)); + __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), + Immediate(1)); + __ j(kCarryClear, &done); + if (HasEmptyFrame()) { + CHECK(is_frame_entry); + // Frame alignment, and the stub expects the method on the stack. + __ pushq(CpuRegister(RDI)); + __ cfi().AdjustCFAOffset(kX86_64WordSize); + __ cfi().RelOffset(DWARFReg(RDI), 0); + } else if (!RequiresCurrentMethod()) { + CHECK(is_frame_entry); + __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI)); + } + GenerateInvokeRuntime( + GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value()); + if (HasEmptyFrame()) { + __ popq(CpuRegister(RDI)); + __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize)); + __ cfi().Restore(DWARFReg(RDI)); + } + __ Bind(&done); + } + } +} + void CodeGeneratorX86_64::GenerateFrameEntry() { __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address __ Bind(&frame_entry_label_); @@ -1344,11 +1402,6 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - if (GetCompilerOptions().CountHotnessInCompiledCode()) { - __ addw(Address(CpuRegister(kMethodRegisterArgument), - ArtMethod::HotnessCountOffset().Int32Value()), - Immediate(1)); - } if (!skip_overflow_check) { size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64); @@ -1356,52 +1409,54 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { RecordPcInfo(nullptr, 0); } - if (HasEmptyFrame()) { - return; - } - - for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { - Register reg = kCoreCalleeSaves[i]; - if (allocated_registers_.ContainsCoreRegister(reg)) { - __ pushq(CpuRegister(reg)); - __ cfi().AdjustCFAOffset(kX86_64WordSize); - __ cfi().RelOffset(DWARFReg(reg), 0); + if (!HasEmptyFrame()) { + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { + Register reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + __ pushq(CpuRegister(reg)); + __ cfi().AdjustCFAOffset(kX86_64WordSize); + __ cfi().RelOffset(DWARFReg(reg), 0); + } } - } - int adjust = GetFrameSize() - GetCoreSpillSize(); - __ subq(CpuRegister(RSP), Immediate(adjust)); - __ cfi().AdjustCFAOffset(adjust); - uint32_t xmm_spill_location = GetFpuSpillStart(); - size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); + int adjust = GetFrameSize() - GetCoreSpillSize(); + __ subq(CpuRegister(RSP), Immediate(adjust)); + __ cfi().AdjustCFAOffset(adjust); + uint32_t xmm_spill_location = GetFpuSpillStart(); + size_t xmm_spill_slot_size = GetCalleePreservedFPWidth(); + + for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { + if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { + int offset = xmm_spill_location + (xmm_spill_slot_size * i); + __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i])); + __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset); + } + } - for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { - if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { - int offset = xmm_spill_location + (xmm_spill_slot_size * i); - __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i])); - __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset); + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + CHECK(!HasEmptyFrame()); + __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), + CpuRegister(kMethodRegisterArgument)); } - } - // Save the current method if we need it. Note that we do not - // do this in HCurrentMethod, as the instruction might have been removed - // in the SSA graph. - if (RequiresCurrentMethod()) { - __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), - CpuRegister(kMethodRegisterArgument)); + if (GetGraph()->HasShouldDeoptimizeFlag()) { + CHECK(!HasEmptyFrame()); + // Initialize should_deoptimize flag to 0. + __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); + } } - if (GetGraph()->HasShouldDeoptimizeFlag()) { - // Initialize should_deoptimize flag to 0. - __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); - } + MaybeIncrementHotness(/* is_frame_entry= */ true); } void CodeGeneratorX86_64::GenerateFrameExit() { __ cfi().RememberState(); if (!HasEmptyFrame()) { uint32_t xmm_spill_location = GetFpuSpillStart(); - size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); + size_t xmm_spill_slot_size = GetCalleePreservedFPWidth(); for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { int offset = xmm_spill_location + (xmm_spill_slot_size * i); @@ -1541,11 +1596,7 @@ void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0)); - __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()), - Immediate(1)); - } + codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -2315,28 +2366,41 @@ void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { } void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { - if (kIsDebugBuild) { - switch (ret->InputAt(0)->GetType()) { - case DataType::Type::kReference: - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kUint16: - case DataType::Type::kInt16: - case DataType::Type::kInt32: - case DataType::Type::kInt64: - DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX); - break; - - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(), - XMM0); - break; + switch (ret->InputAt(0)->GetType()) { + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX); + break; - default: - LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); + case DataType::Type::kFloat32: { + DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(), + XMM0); + // To simplify callers of an OSR method, we put the return value in both + // floating point and core register. + if (GetGraph()->IsCompilingOsr()) { + __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false); + } + break; + } + case DataType::Type::kFloat64: { + DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(), + XMM0); + // To simplify callers of an OSR method, we put the return value in both + // floating point and core register. + if (GetGraph()->IsCompilingOsr()) { + __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true); + } + break; } + + default: + LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); } codegen_->GenerateFrameExit(); } @@ -2505,6 +2569,31 @@ void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX)); } +void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction, + CpuRegister klass) { + DCHECK_EQ(RDI, klass.AsRegister()); + // We know the destination of an intrinsic, so no need to record inline + // caches. + if (!instruction->GetLocations()->Intrinsified() && + GetGraph()->IsCompilingBaseline() && + !Runtime::Current()->IsAotCompiler()) { + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + if (info != nullptr) { + InlineCache* cache = info->GetInlineCache(instruction->GetDexPc()); + uint64_t address = reinterpret_cast64<uint64_t>(cache); + NearLabel done; + __ movq(CpuRegister(TMP), Immediate(address)); + // Fast path for a monomorphic cache. + __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass); + __ j(kEqual, &done); + GenerateInvokeRuntime( + GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value()); + __ Bind(&done); + } + } +} + void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. LocationSummary* locations = invoke->GetLocations(); @@ -2513,11 +2602,6 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo Location receiver = locations->InAt(0); size_t class_offset = mirror::Object::ClassOffset().SizeValue(); - // Set the hidden argument. This is safe to do this here, as RAX - // won't be modified thereafter, before the `call` instruction. - DCHECK_EQ(RAX, hidden_reg.AsRegister()); - codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex()); - if (receiver.IsStackSlot()) { __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex())); // /* HeapReference<Class> */ temp = temp->klass_ @@ -2535,6 +2619,15 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo // intact/accessible until the end of the marking phase (the // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); + + codegen_->MaybeGenerateInlineCacheCheck(invoke, temp); + + // Set the hidden argument. This is safe to do this here, as RAX + // won't be modified thereafter, before the `call` instruction. + // We also di it after MaybeGenerateInlineCache that may use RAX. + DCHECK_EQ(RAX, hidden_reg.AsRegister()); + codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex()); + // temp = temp->GetAddressOfIMT() __ movq(temp, Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); @@ -2891,7 +2984,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver __ movl(output, Immediate(kPrimIntMax)); // if input >= (float)INT_MAX goto done - __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax)); + __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax))); __ j(kAboveEqual, &done); // if input == NaN goto nan __ j(kUnordered, &nan); @@ -2952,7 +3045,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver codegen_->Load64BitValue(output, kPrimLongMax); // if input >= (float)LONG_MAX goto done - __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax)); + __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax))); __ j(kAboveEqual, &done); // if input == NaN goto nan __ j(kUnordered, &nan); @@ -2973,7 +3066,8 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver codegen_->Load64BitValue(output, kPrimLongMax); // if input >= (double)LONG_MAX goto done - __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax)); + __ comisd(input, codegen_->LiteralDoubleAddress( + static_cast<double>(kPrimLongMax))); __ j(kAboveEqual, &done); // if input == NaN goto nan __ j(kUnordered, &nan); @@ -4882,6 +4976,15 @@ void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instru HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); } +void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { + codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX)); +} + +void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { + __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue())); + codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc()); +} + void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet( HUnresolvedInstanceFieldGet* instruction) { FieldAccessCallingConventionX86_64 calling_convention; @@ -5143,13 +5246,11 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool needs_type_check = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, - may_need_runtime_call_for_type_check ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); + needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); @@ -5173,12 +5274,9 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Location index = locations->InAt(1); Location value = locations->InAt(2); DataType::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool needs_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); switch (value_type) { case DataType::Type::kBool: @@ -5220,30 +5318,30 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movl(address, Immediate(0)); codegen_->MaybeRecordImplicitNullCheck(instruction); DCHECK(!needs_write_barrier); - DCHECK(!may_need_runtime_call_for_type_check); + DCHECK(!needs_type_check); break; } DCHECK(needs_write_barrier); CpuRegister register_value = value.AsRegister<CpuRegister>(); - // We cannot use a NearLabel for `done`, as its range may be too - // short when Baker read barriers are enabled. - Label done; - NearLabel not_null, do_put; - SlowPathCode* slow_path = nullptr; Location temp_loc = locations->GetTemp(0); CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - if (may_need_runtime_call_for_type_check) { + + bool can_value_be_null = instruction->GetValueCanBeNull(); + NearLabel do_store; + if (can_value_be_null) { + __ testl(register_value, register_value); + __ j(kEqual, &do_store); + } + + SlowPathCode* slow_path = nullptr; + if (needs_type_check) { slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction); codegen_->AddSlowPath(slow_path); - if (instruction->GetValueCanBeNull()) { - __ testl(register_value, register_value); - __ j(kNotEqual, ¬_null); - __ movl(address, Immediate(0)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ jmp(&done); - __ Bind(¬_null); - } + + const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); // Note that when Baker read barriers are enabled, the type // checks are performed without read barriers. This is fine, @@ -5266,6 +5364,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ cmpl(temp, Address(register_value, class_offset)); if (instruction->StaticTypeOfArrayIsObjectArray()) { + NearLabel do_put; __ j(kEqual, &do_put); // If heap poisoning is enabled, the `temp` reference has // not been unpoisoned yet; unpoison it now. @@ -5282,22 +5381,28 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { } } + CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->MarkGCCard( + temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false); + + if (can_value_be_null) { + DCHECK(do_store.IsLinked()); + __ Bind(&do_store); + } + + Location source = value; if (kPoisonHeapReferences) { __ movl(temp, register_value); __ PoisonHeapReference(temp); - __ movl(address, temp); - } else { - __ movl(address, register_value); + source = temp_loc; } - if (!may_need_runtime_call_for_type_check) { + + __ movl(address, source.AsRegister<CpuRegister>()); + + if (can_value_be_null || !needs_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard( - temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull()); - __ Bind(&done); - if (slow_path != nullptr) { __ Bind(slow_path->GetExitLabel()); } @@ -5822,13 +5927,12 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); const size_t status_byte_offset = mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_initialized_value = - enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + constexpr uint32_t shifted_visibly_initialized_value = + enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); - __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value)); + __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value)); __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); - // No need for memory fence, thanks to the x86-64 memory model. } void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, @@ -5952,7 +6056,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK(codegen_->GetCompilerOptions().IsBootImage() || + codegen_->GetCompilerOptions().IsBootImageExtension()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); codegen_->RecordBootImageTypePatch(cls); @@ -5969,6 +6074,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); + // No need for memory fence, thanks to the x86-64 memory model. generate_null_check = true; break; } @@ -6105,7 +6211,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK(codegen_->GetCompilerOptions().IsBootImage() || + codegen_->GetCompilerOptions().IsBootImageExtension()); __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); codegen_->RecordBootImageStringPatch(load); return; @@ -6122,6 +6229,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); + // No need for memory fence, thanks to the x86-64 memory model. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load); codegen_->AddSlowPath(slow_path); __ testl(out, out); @@ -7650,6 +7758,22 @@ void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots } } +bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() { + return codegen_->GetInstructionSetFeatures().HasAVX(); +} + +bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() { + return codegen_->GetInstructionSetFeatures().HasAVX2(); +} + +bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() { + return codegen_->GetInstructionSetFeatures().HasAVX(); +} + +bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() { + return codegen_->GetInstructionSetFeatures().HasAVX2(); +} + #undef __ } // namespace x86_64 diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index f74e130702..2e8d9b3315 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -177,6 +177,8 @@ class LocationsBuilderX86_64 : public HGraphVisitor { void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction); + bool CpuHasAvxFeatureFlag(); + bool CpuHasAvx2FeatureFlag(); CodeGeneratorX86_64* const codegen_; InvokeDexCallingConventionVisitorX86_64 parameter_visitor_; @@ -287,6 +289,9 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void HandleGoto(HInstruction* got, HBasicBlock* successor); + bool CpuHasAvxFeatureFlag(); + bool CpuHasAvx2FeatureFlag(); + X86_64Assembler* const assembler_; CodeGeneratorX86_64* const codegen_; @@ -333,12 +338,16 @@ class CodeGeneratorX86_64 : public CodeGenerator { return kX86_64WordSize; } - size_t GetFloatingPointSpillSlotSize() const override { + size_t GetSlowPathFPWidth() const override { return GetGraph()->HasSIMD() ? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill } + size_t GetCalleePreservedFPWidth() const override { + return 1 * kX86_64WordSize; + } + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } @@ -591,6 +600,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateNop() override; void GenerateImplicitNullCheck(HNullCheck* instruction) override; void GenerateExplicitNullCheck(HNullCheck* instruction) override; + void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls); + + + void MaybeIncrementHotness(bool is_frame_entry); // When we don't know the proper offset for the value, we use kDummy32BitOffset. // We will fix this up in the linker later to have the right value. @@ -613,8 +626,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Used for fixups to the constant area. int constant_area_start_; - // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. - // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). + // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_; @@ -626,8 +638,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; - // PC-relative patch info for IntrinsicObjects. - ArenaDeque<PatchInfo<Label>> boot_image_intrinsic_patches_; + // PC-relative patch info for IntrinsicObjects for the boot image, + // and for method/type/string patches for kBootImageRelRo otherwise. + ArenaDeque<PatchInfo<Label>> boot_image_other_patches_; // Patches for string literals in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index b5a7c137f6..d9b4f79e8b 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -29,8 +29,6 @@ #include "register_allocator_linear_scan.h" #include "utils/arm/assembler_arm_vixl.h" #include "utils/arm/managed_register_arm.h" -#include "utils/mips/managed_register_mips.h" -#include "utils/mips64/managed_register_mips64.h" #include "utils/x86/managed_register_x86.h" #include "gtest/gtest.h" @@ -55,12 +53,6 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { #ifdef ART_ENABLE_CODEGEN_x86_64 CodegenTargetConfig(InstructionSet::kX86_64, create_codegen_x86_64), #endif -#ifdef ART_ENABLE_CODEGEN_mips - CodegenTargetConfig(InstructionSet::kMips, create_codegen_mips), -#endif -#ifdef ART_ENABLE_CODEGEN_mips64 - CodegenTargetConfig(InstructionSet::kMips64, create_codegen_mips64) -#endif }; for (const CodegenTargetConfig& test_config : test_config_candidates) { @@ -834,6 +826,7 @@ TEST_F(CodegenTest, ARM64IsaVIXLFeaturesA75) { EXPECT_TRUE(features->Has(vixl::CPUFeatures::kCRC32)); EXPECT_TRUE(features->Has(vixl::CPUFeatures::kDotProduct)); EXPECT_TRUE(features->Has(vixl::CPUFeatures::kFPHalf)); + EXPECT_TRUE(features->Has(vixl::CPUFeatures::kNEONHalf)); EXPECT_TRUE(features->Has(vixl::CPUFeatures::kAtomics)); } @@ -847,70 +840,53 @@ TEST_F(CodegenTest, ARM64IsaVIXLFeaturesA53) { EXPECT_TRUE(features->Has(vixl::CPUFeatures::kCRC32)); EXPECT_FALSE(features->Has(vixl::CPUFeatures::kDotProduct)); EXPECT_FALSE(features->Has(vixl::CPUFeatures::kFPHalf)); + EXPECT_FALSE(features->Has(vixl::CPUFeatures::kNEONHalf)); EXPECT_FALSE(features->Has(vixl::CPUFeatures::kAtomics)); } -#endif - -#ifdef ART_ENABLE_CODEGEN_mips -TEST_F(CodegenTest, MipsClobberRA) { - OverrideInstructionSetFeatures(InstructionSet::kMips, "mips32r"); - CHECK(!instruction_set_features_->AsMipsInstructionSetFeatures()->IsR6()); - if (!CanExecute(InstructionSet::kMips)) { - // HMipsComputeBaseMethodAddress and the NAL instruction behind it - // should only be generated on non-R6. - return; - } +constexpr static size_t kExpectedFPSpillSize = 8 * vixl::aarch64::kDRegSizeInBytes; +// The following two tests check that for both SIMD and non-SIMD graphs exactly 64-bit is +// allocated on stack per callee-saved FP register to be preserved in the frame entry as +// ABI states. +TEST_F(CodegenTest, ARM64FrameSizeSIMD) { + OverrideInstructionSetFeatures(InstructionSet::kArm64, "default"); HGraph* graph = CreateGraph(); + arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_); - HBasicBlock* entry_block = new (GetAllocator()) HBasicBlock(graph); - graph->AddBlock(entry_block); - graph->SetEntryBlock(entry_block); - entry_block->AddInstruction(new (GetAllocator()) HGoto()); + codegen.Initialize(); + graph->SetHasSIMD(true); - HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); - graph->AddBlock(block); + DCHECK_EQ(arm64::callee_saved_fp_registers.GetCount(), 8); + vixl::aarch64::CPURegList reg_list = arm64::callee_saved_fp_registers; + while (!reg_list.IsEmpty()) { + uint32_t reg_code = reg_list.PopLowestIndex().GetCode(); + codegen.AddAllocatedRegister(Location::FpuRegisterLocation(reg_code)); + } + codegen.ComputeSpillMask(); - HBasicBlock* exit_block = new (GetAllocator()) HBasicBlock(graph); - graph->AddBlock(exit_block); - graph->SetExitBlock(exit_block); - exit_block->AddInstruction(new (GetAllocator()) HExit()); + EXPECT_EQ(codegen.GetFpuSpillSize(), kExpectedFPSpillSize); +} - entry_block->AddSuccessor(block); - block->AddSuccessor(exit_block); +TEST_F(CodegenTest, ARM64FrameSizeNoSIMD) { + OverrideInstructionSetFeatures(InstructionSet::kArm64, "default"); + HGraph* graph = CreateGraph(); + arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_); - // To simplify matters, don't create PC-relative HLoadClass or HLoadString. - // Instead, generate HMipsComputeBaseMethodAddress directly. - HMipsComputeBaseMethodAddress* base = new (GetAllocator()) HMipsComputeBaseMethodAddress(); - block->AddInstruction(base); - // HMipsComputeBaseMethodAddress is defined as int, so just make the - // compiled method return it. - block->AddInstruction(new (GetAllocator()) HReturn(base)); + codegen.Initialize(); + graph->SetHasSIMD(false); - graph->BuildDominatorTree(); + DCHECK_EQ(arm64::callee_saved_fp_registers.GetCount(), 8); + vixl::aarch64::CPURegList reg_list = arm64::callee_saved_fp_registers; + while (!reg_list.IsEmpty()) { + uint32_t reg_code = reg_list.PopLowestIndex().GetCode(); + codegen.AddAllocatedRegister(Location::FpuRegisterLocation(reg_code)); + } + codegen.ComputeSpillMask(); - mips::CodeGeneratorMIPS codegenMIPS(graph, *compiler_options_); - // Since there isn't HLoadClass or HLoadString, we need to manually indicate - // that RA is clobbered and the method entry code should generate a stack frame - // and preserve RA in it. And this is what we're testing here. - codegenMIPS.ClobberRA(); - // Without ClobberRA() the code would be: - // nal # Sets RA to point to the jr instruction below - // move v0, ra # and the CPU falls into an infinite loop. - // jr ra - // nop - // The expected code is: - // addiu sp, sp, -16 - // sw ra, 12(sp) - // sw a0, 0(sp) - // nal # Sets RA to point to the lw instruction below. - // move v0, ra - // lw ra, 12(sp) - // jr ra - // addiu sp, sp, 16 - RunCode(&codegenMIPS, graph, [](HGraph*) {}, false, 0); + EXPECT_EQ(codegen.GetFpuSpillSize(), kExpectedFPSpillSize); } + #endif } // namespace art diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index dde39d46f3..9fbd7d6fc0 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -19,8 +19,6 @@ #include "arch/arm/registers_arm.h" #include "arch/instruction_set.h" -#include "arch/mips/registers_mips.h" -#include "arch/mips64/registers_mips64.h" #include "arch/x86/registers_x86.h" #include "code_simulator.h" #include "code_simulator_container.h" @@ -45,14 +43,6 @@ #include "code_generator_x86_64.h" #endif -#ifdef ART_ENABLE_CODEGEN_mips -#include "code_generator_mips.h" -#endif - -#ifdef ART_ENABLE_CODEGEN_mips64 -#include "code_generator_mips64.h" -#endif - namespace art { typedef CodeGenerator* (*CreateCodegenFn)(HGraph*, const CompilerOptions&); @@ -346,18 +336,6 @@ CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compi } #endif -#ifdef ART_ENABLE_CODEGEN_mips -CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) { - return new (graph->GetAllocator()) mips::CodeGeneratorMIPS(graph, compiler_options); -} -#endif - -#ifdef ART_ENABLE_CODEGEN_mips64 -CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) { - return new (graph->GetAllocator()) mips64::CodeGeneratorMIPS64(graph, compiler_options); -} -#endif - } // namespace art #endif // ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_ diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index 7d3af9521a..320915ee57 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -17,7 +17,6 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ -#include "dwarf/register.h" #include "instruction_simplifier_shared.h" #include "locations.h" #include "nodes.h" @@ -38,14 +37,6 @@ namespace helpers { static_assert(vixl::aarch32::kSpCode == SP, "vixl::aarch32::kSpCode must equal ART's SP"); -inline dwarf::Reg DWARFReg(vixl::aarch32::Register reg) { - return dwarf::Reg::ArmCore(static_cast<int>(reg.GetCode())); -} - -inline dwarf::Reg DWARFReg(vixl::aarch32::SRegister reg) { - return dwarf::Reg::ArmFp(static_cast<int>(reg.GetCode())); -} - inline vixl::aarch32::Register HighRegisterFrom(Location location) { DCHECK(location.IsRegisterPair()) << location; return vixl::aarch32::Register(location.AsRegisterPairHigh<vixl::aarch32::Register>()); diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index 5556f16740..41f284fad2 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -87,36 +87,41 @@ inline vixl::aarch64::Register InputRegisterAt(HInstruction* instr, int input_in instr->InputAt(input_index)->GetType()); } -inline vixl::aarch64::FPRegister DRegisterFrom(Location location) { +inline vixl::aarch64::VRegister DRegisterFrom(Location location) { DCHECK(location.IsFpuRegister()) << location; - return vixl::aarch64::FPRegister::GetDRegFromCode(location.reg()); + return vixl::aarch64::VRegister::GetDRegFromCode(location.reg()); } -inline vixl::aarch64::FPRegister QRegisterFrom(Location location) { +inline vixl::aarch64::VRegister QRegisterFrom(Location location) { DCHECK(location.IsFpuRegister()) << location; - return vixl::aarch64::FPRegister::GetQRegFromCode(location.reg()); + return vixl::aarch64::VRegister::GetQRegFromCode(location.reg()); } -inline vixl::aarch64::FPRegister VRegisterFrom(Location location) { +inline vixl::aarch64::VRegister VRegisterFrom(Location location) { DCHECK(location.IsFpuRegister()) << location; - return vixl::aarch64::FPRegister::GetVRegFromCode(location.reg()); + return vixl::aarch64::VRegister::GetVRegFromCode(location.reg()); } -inline vixl::aarch64::FPRegister SRegisterFrom(Location location) { +inline vixl::aarch64::VRegister SRegisterFrom(Location location) { DCHECK(location.IsFpuRegister()) << location; - return vixl::aarch64::FPRegister::GetSRegFromCode(location.reg()); + return vixl::aarch64::VRegister::GetSRegFromCode(location.reg()); } -inline vixl::aarch64::FPRegister FPRegisterFrom(Location location, DataType::Type type) { +inline vixl::aarch64::VRegister HRegisterFrom(Location location) { + DCHECK(location.IsFpuRegister()) << location; + return vixl::aarch64::VRegister::GetHRegFromCode(location.reg()); +} + +inline vixl::aarch64::VRegister FPRegisterFrom(Location location, DataType::Type type) { DCHECK(DataType::IsFloatingPointType(type)) << type; return type == DataType::Type::kFloat64 ? DRegisterFrom(location) : SRegisterFrom(location); } -inline vixl::aarch64::FPRegister OutputFPRegister(HInstruction* instr) { +inline vixl::aarch64::VRegister OutputFPRegister(HInstruction* instr) { return FPRegisterFrom(instr->GetLocations()->Out(), instr->GetType()); } -inline vixl::aarch64::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) { +inline vixl::aarch64::VRegister InputFPRegisterAt(HInstruction* instr, int input_index) { return FPRegisterFrom(instr->GetLocations()->InAt(input_index), instr->InputAt(input_index)->GetType()); } @@ -201,7 +206,7 @@ inline Location LocationFrom(const vixl::aarch64::Register& reg) { return Location::RegisterLocation(ARTRegCodeFromVIXL(reg.GetCode())); } -inline Location LocationFrom(const vixl::aarch64::FPRegister& fpreg) { +inline Location LocationFrom(const vixl::aarch64::VRegister& fpreg) { return Location::FpuRegisterLocation(fpreg.GetCode()); } diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index 09e7cabfa4..2031707759 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -217,6 +217,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitBelowOrEqual(HBelowOrEqual* i } void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) { + DataType::Type type = instruction->GetType(); HConstant* input_cst = instruction->GetConstantRight(); if ((input_cst != nullptr) && input_cst->IsZeroBitPattern()) { // Replace code looking like @@ -226,6 +227,25 @@ void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) { instruction->ReplaceWith(input_cst); instruction->GetBlock()->RemoveInstruction(instruction); } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + + if (left->IsNot() ^ right->IsNot()) { + // Replace code looking like + // NOT notsrc, src + // AND dst, notsrc, src + // with + // CONSTANT 0 + HInstruction* hnot = (left->IsNot() ? left : right); + HInstruction* hother = (left->IsNot() ? right : left); + HInstruction* src = hnot->AsNot()->GetInput(); + + if (src == hother) { + instruction->ReplaceWith(GetGraph()->GetConstant(type, 0)); + instruction->GetBlock()->RemoveInstruction(instruction); + } + } } void InstructionWithAbsorbingInputSimplifier::VisitCompare(HCompare* instruction) { diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc deleted file mode 100644 index 63a370a47b..0000000000 --- a/compiler/optimizing/emit_swap_mips_test.cc +++ /dev/null @@ -1,356 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "base/arena_allocator.h" -#include "code_generator_mips.h" -#include "optimizing_unit_test.h" -#include "parallel_move_resolver.h" -#include "utils/assembler_test_base.h" -#include "utils/mips/assembler_mips.h" - -#include "gtest/gtest.h" - -namespace art { - -class EmitSwapMipsTest : public OptimizingUnitTest { - public: - void SetUp() override { - instruction_set_ = InstructionSet::kMips; - instruction_set_features_ = MipsInstructionSetFeatures::FromCppDefines(); - OptimizingUnitTest::SetUp(); - graph_ = CreateGraph(); - codegen_.reset( - new (graph_->GetAllocator()) mips::CodeGeneratorMIPS(graph_, *compiler_options_)); - moves_ = new (GetAllocator()) HParallelMove(GetAllocator()); - test_helper_.reset( - new AssemblerTestInfrastructure(GetArchitectureString(), - GetAssemblerCmdName(), - GetAssemblerParameters(), - GetObjdumpCmdName(), - GetObjdumpParameters(), - GetDisassembleCmdName(), - GetDisassembleParameters(), - GetAssemblyHeader())); - } - - void TearDown() override { - test_helper_.reset(); - codegen_.reset(); - graph_ = nullptr; - ResetPoolAndAllocator(); - OptimizingUnitTest::TearDown(); - } - - // Get the typically used name for this architecture. - std::string GetArchitectureString() { - return "mips"; - } - - // Get the name of the assembler. - std::string GetAssemblerCmdName() { - return "as"; - } - - // Switches to the assembler command. - std::string GetAssemblerParameters() { - return " --no-warn -32 -march=mips32r2"; - } - - // Get the name of the objdump. - std::string GetObjdumpCmdName() { - return "objdump"; - } - - // Switches to the objdump command. - std::string GetObjdumpParameters() { - return " -h"; - } - - // Get the name of the objdump. - std::string GetDisassembleCmdName() { - return "objdump"; - } - - // Switches to the objdump command. - std::string GetDisassembleParameters() { - return " -D -bbinary -mmips:isa32r2"; - } - - // No need for assembly header here. - const char* GetAssemblyHeader() { - return nullptr; - } - - void DriverWrapper(HParallelMove* move, - const std::string& assembly_text, - const std::string& test_name) { - codegen_->GetMoveResolver()->EmitNativeCode(move); - assembler_ = codegen_->GetAssembler(); - assembler_->FinalizeCode(); - std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(assembler_->CodeSize())); - MemoryRegion code(&(*data)[0], data->size()); - assembler_->FinalizeInstructions(code); - test_helper_->Driver(*data, assembly_text, test_name); - } - - protected: - HGraph* graph_; - HParallelMove* moves_; - std::unique_ptr<mips::CodeGeneratorMIPS> codegen_; - mips::MipsAssembler* assembler_; - std::unique_ptr<AssemblerTestInfrastructure> test_helper_; -}; - -TEST_F(EmitSwapMipsTest, TwoRegisters) { - moves_->AddMove( - Location::RegisterLocation(4), - Location::RegisterLocation(5), - DataType::Type::kInt32, - nullptr); - moves_->AddMove( - Location::RegisterLocation(5), - Location::RegisterLocation(4), - DataType::Type::kInt32, - nullptr); - const char* expected = - "or $t8, $a1, $zero\n" - "or $a1, $a0, $zero\n" - "or $a0, $t8, $zero\n"; - DriverWrapper(moves_, expected, "TwoRegisters"); -} - -TEST_F(EmitSwapMipsTest, TwoRegisterPairs) { - moves_->AddMove( - Location::RegisterPairLocation(4, 5), - Location::RegisterPairLocation(6, 7), - DataType::Type::kInt64, - nullptr); - moves_->AddMove( - Location::RegisterPairLocation(6, 7), - Location::RegisterPairLocation(4, 5), - DataType::Type::kInt64, - nullptr); - const char* expected = - "or $t8, $a2, $zero\n" - "or $a2, $a0, $zero\n" - "or $a0, $t8, $zero\n" - "or $t8, $a3, $zero\n" - "or $a3, $a1, $zero\n" - "or $a1, $t8, $zero\n"; - DriverWrapper(moves_, expected, "TwoRegisterPairs"); -} - -TEST_F(EmitSwapMipsTest, TwoFpuRegistersFloat) { - moves_->AddMove( - Location::FpuRegisterLocation(4), - Location::FpuRegisterLocation(2), - DataType::Type::kFloat32, - nullptr); - moves_->AddMove( - Location::FpuRegisterLocation(2), - Location::FpuRegisterLocation(4), - DataType::Type::kFloat32, - nullptr); - const char* expected = - "mov.s $f6, $f2\n" - "mov.s $f2, $f4\n" - "mov.s $f4, $f6\n"; - DriverWrapper(moves_, expected, "TwoFpuRegistersFloat"); -} - -TEST_F(EmitSwapMipsTest, TwoFpuRegistersDouble) { - moves_->AddMove( - Location::FpuRegisterLocation(4), - Location::FpuRegisterLocation(2), - DataType::Type::kFloat64, - nullptr); - moves_->AddMove( - Location::FpuRegisterLocation(2), - Location::FpuRegisterLocation(4), - DataType::Type::kFloat64, - nullptr); - const char* expected = - "mov.d $f6, $f2\n" - "mov.d $f2, $f4\n" - "mov.d $f4, $f6\n"; - DriverWrapper(moves_, expected, "TwoFpuRegistersDouble"); -} - -TEST_F(EmitSwapMipsTest, RegisterAndFpuRegister) { - moves_->AddMove( - Location::RegisterLocation(4), - Location::FpuRegisterLocation(2), - DataType::Type::kFloat32, - nullptr); - moves_->AddMove( - Location::FpuRegisterLocation(2), - Location::RegisterLocation(4), - DataType::Type::kFloat32, - nullptr); - const char* expected = - "or $t8, $a0, $zero\n" - "mfc1 $a0, $f2\n" - "mtc1 $t8, $f2\n"; - DriverWrapper(moves_, expected, "RegisterAndFpuRegister"); -} - -TEST_F(EmitSwapMipsTest, RegisterPairAndFpuRegister) { - moves_->AddMove( - Location::RegisterPairLocation(4, 5), - Location::FpuRegisterLocation(4), - DataType::Type::kFloat64, - nullptr); - moves_->AddMove( - Location::FpuRegisterLocation(4), - Location::RegisterPairLocation(4, 5), - DataType::Type::kFloat64, - nullptr); - const char* expected = - "mfc1 $t8, $f4\n" - "mfc1 $at, $f5\n" - "mtc1 $a0, $f4\n" - "mtc1 $a1, $f5\n" - "or $a0, $t8, $zero\n" - "or $a1, $at, $zero\n"; - DriverWrapper(moves_, expected, "RegisterPairAndFpuRegister"); -} - -TEST_F(EmitSwapMipsTest, TwoStackSlots) { - moves_->AddMove( - Location::StackSlot(52), - Location::StackSlot(48), - DataType::Type::kInt32, - nullptr); - moves_->AddMove( - Location::StackSlot(48), - Location::StackSlot(52), - DataType::Type::kInt32, - nullptr); - const char* expected = - "addiu $sp, $sp, -16\n" - "sw $v0, 0($sp)\n" - "lw $v0, 68($sp)\n" - "lw $t8, 64($sp)\n" - "sw $v0, 64($sp)\n" - "sw $t8, 68($sp)\n" - "lw $v0, 0($sp)\n" - "addiu $sp, $sp, 16\n"; - DriverWrapper(moves_, expected, "TwoStackSlots"); -} - -TEST_F(EmitSwapMipsTest, TwoDoubleStackSlots) { - moves_->AddMove( - Location::DoubleStackSlot(56), - Location::DoubleStackSlot(48), - DataType::Type::kInt64, - nullptr); - moves_->AddMove( - Location::DoubleStackSlot(48), - Location::DoubleStackSlot(56), - DataType::Type::kInt64, - nullptr); - const char* expected = - "addiu $sp, $sp, -16\n" - "sw $v0, 0($sp)\n" - "lw $v0, 72($sp)\n" - "lw $t8, 64($sp)\n" - "sw $v0, 64($sp)\n" - "sw $t8, 72($sp)\n" - "lw $v0, 76($sp)\n" - "lw $t8, 68($sp)\n" - "sw $v0, 68($sp)\n" - "sw $t8, 76($sp)\n" - "lw $v0, 0($sp)\n" - "addiu $sp, $sp, 16\n"; - DriverWrapper(moves_, expected, "TwoDoubleStackSlots"); -} - -TEST_F(EmitSwapMipsTest, RegisterAndStackSlot) { - moves_->AddMove( - Location::RegisterLocation(4), - Location::StackSlot(48), - DataType::Type::kInt32, - nullptr); - moves_->AddMove( - Location::StackSlot(48), - Location::RegisterLocation(4), - DataType::Type::kInt32, - nullptr); - const char* expected = - "or $t8, $a0, $zero\n" - "lw $a0, 48($sp)\n" - "sw $t8, 48($sp)\n"; - DriverWrapper(moves_, expected, "RegisterAndStackSlot"); -} - -TEST_F(EmitSwapMipsTest, RegisterPairAndDoubleStackSlot) { - moves_->AddMove( - Location::RegisterPairLocation(4, 5), - Location::DoubleStackSlot(32), - DataType::Type::kInt64, - nullptr); - moves_->AddMove( - Location::DoubleStackSlot(32), - Location::RegisterPairLocation(4, 5), - DataType::Type::kInt64, - nullptr); - const char* expected = - "or $t8, $a0, $zero\n" - "lw $a0, 32($sp)\n" - "sw $t8, 32($sp)\n" - "or $t8, $a1, $zero\n" - "lw $a1, 36($sp)\n" - "sw $t8, 36($sp)\n"; - DriverWrapper(moves_, expected, "RegisterPairAndDoubleStackSlot"); -} - -TEST_F(EmitSwapMipsTest, FpuRegisterAndStackSlot) { - moves_->AddMove( - Location::FpuRegisterLocation(4), - Location::StackSlot(48), - DataType::Type::kFloat32, - nullptr); - moves_->AddMove( - Location::StackSlot(48), - Location::FpuRegisterLocation(4), - DataType::Type::kFloat32, - nullptr); - const char* expected = - "mov.s $f6, $f4\n" - "lwc1 $f4, 48($sp)\n" - "swc1 $f6, 48($sp)\n"; - DriverWrapper(moves_, expected, "FpuRegisterAndStackSlot"); -} - -TEST_F(EmitSwapMipsTest, FpuRegisterAndDoubleStackSlot) { - moves_->AddMove( - Location::FpuRegisterLocation(4), - Location::DoubleStackSlot(48), - DataType::Type::kFloat64, - nullptr); - moves_->AddMove( - Location::DoubleStackSlot(48), - Location::FpuRegisterLocation(4), - DataType::Type::kFloat64, - nullptr); - const char* expected = - "mov.d $f6, $f4\n" - "ldc1 $f4, 48($sp)\n" - "sdc1 $f6, 48($sp)\n"; - DriverWrapper(moves_, expected, "FpuRegisterAndDoubleStackSlot"); -} - -} // namespace art diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 01d9603802..95cfe3ebdb 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -497,7 +497,7 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } // Ensure that reference type instructions have reference type info. - if (instruction->GetType() == DataType::Type::kReference) { + if (check_reference_type_info_ && instruction->GetType() == DataType::Type::kReference) { if (!instruction->GetReferenceTypeInfo().IsValid()) { AddError(StringPrintf("Reference type instruction %s:%d does not have " "valid reference type information.", diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index d085609197..564b1377ec 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -95,6 +95,15 @@ class GraphChecker : public HGraphDelegateVisitor { } } + // Enable/Disable the reference type info check. + // + // Return: the previous status of the check. + bool SetRefTypeInfoCheckEnabled(bool value = true) { + bool old_value = check_reference_type_info_; + check_reference_type_info_ = value; + return old_value; + } + protected: // Report a new error. void AddError(const std::string& error) { @@ -111,6 +120,10 @@ class GraphChecker : public HGraphDelegateVisitor { const char* const dump_prefix_; ScopedArenaAllocator allocator_; ArenaBitVector seen_ids_; + // Whether to perform the reference type info check for instructions which use or produce + // object references, e.g. HNewInstance, HLoadClass. + // The default value is true. + bool check_reference_type_info_ = true; DISALLOW_COPY_AND_ASSIGN(GraphChecker); }; diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 2a7bbcb72f..d94c1fa2fb 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -22,6 +22,7 @@ #include <sstream> #include "art_method.h" +#include "base/intrusive_forward_list.h" #include "bounds_check_elimination.h" #include "builder.h" #include "code_generator.h" @@ -38,7 +39,6 @@ #include "scoped_thread_state_change-inl.h" #include "ssa_liveness_analysis.h" #include "utils/assembler.h" -#include "utils/intrusive_forward_list.h" namespace art { @@ -113,16 +113,19 @@ class HGraphVisualizerDisassembler { const uint8_t* base_address, const uint8_t* end_address) : instruction_set_(instruction_set), disassembler_(nullptr) { - libart_disassembler_handle_ = - dlopen(kIsDebugBuild ? "libartd-disassembler.so" : "libart-disassembler.so", RTLD_NOW); + constexpr const char* libart_disassembler_so_name = + kIsDebugBuild ? "libartd-disassembler.so" : "libart-disassembler.so"; + libart_disassembler_handle_ = dlopen(libart_disassembler_so_name, RTLD_NOW); if (libart_disassembler_handle_ == nullptr) { - LOG(WARNING) << "Failed to dlopen libart-disassembler: " << dlerror(); + LOG(ERROR) << "Failed to dlopen " << libart_disassembler_so_name << ": " << dlerror(); return; } + constexpr const char* create_disassembler_symbol = "create_disassembler"; create_disasm_prototype* create_disassembler = reinterpret_cast<create_disasm_prototype*>( - dlsym(libart_disassembler_handle_, "create_disassembler")); + dlsym(libart_disassembler_handle_, create_disassembler_symbol)); if (create_disassembler == nullptr) { - LOG(WARNING) << "Could not find create_disassembler entry: " << dlerror(); + LOG(ERROR) << "Could not find " << create_disassembler_symbol << " entry in " + << libart_disassembler_so_name << ": " << dlerror(); return; } // Reading the disassembly from 0x0 is easier, so we print relative diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 205077fb49..24d6e656c2 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -465,6 +465,30 @@ static bool AlwaysThrows(const CompilerOptions& compiler_options, ArtMethod* met return throw_seen; } +ArtMethod* HInliner::FindActualCallTarget(HInvoke* invoke_instruction, bool* cha_devirtualize) { + ArtMethod* resolved_method = invoke_instruction->GetResolvedMethod(); + DCHECK(resolved_method != nullptr); + + ArtMethod* actual_method = nullptr; + if (invoke_instruction->IsInvokeStaticOrDirect()) { + actual_method = resolved_method; + } else { + // Check if we can statically find the method. + actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); + } + + if (actual_method == nullptr) { + ArtMethod* method = TryCHADevirtualization(resolved_method); + if (method != nullptr) { + *cha_devirtualize = true; + actual_method = method; + LOG_NOTE() << "Try CHA-based inlining of " << actual_method->PrettyMethod(); + } + } + + return actual_method; +} + bool HInliner::TryInline(HInvoke* invoke_instruction) { if (invoke_instruction->IsInvokeUnresolved() || invoke_instruction->IsInvokePolymorphic() || @@ -485,56 +509,42 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { LOG_FAIL_NO_STAT() << "Not inlining a String.<init> method"; return false; } - ArtMethod* actual_method = nullptr; - - if (invoke_instruction->IsInvokeStaticOrDirect()) { - actual_method = resolved_method; - } else { - // Check if we can statically find the method. - actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); - } bool cha_devirtualize = false; - if (actual_method == nullptr) { - ArtMethod* method = TryCHADevirtualization(resolved_method); - if (method != nullptr) { - cha_devirtualize = true; - actual_method = method; - LOG_NOTE() << "Try CHA-based inlining of " << actual_method->PrettyMethod(); - } - } + ArtMethod* actual_method = FindActualCallTarget(invoke_instruction, &cha_devirtualize); - if (actual_method != nullptr) { - // Single target. - bool result = TryInlineAndReplace(invoke_instruction, - actual_method, - ReferenceTypeInfo::CreateInvalid(), - /* do_rtp= */ true, - cha_devirtualize); - if (result) { - // Successfully inlined. - if (!invoke_instruction->IsInvokeStaticOrDirect()) { - if (cha_devirtualize) { - // Add dependency due to devirtualization. We've assumed resolved_method - // has single implementation. - outermost_graph_->AddCHASingleImplementationDependency(resolved_method); - MaybeRecordStat(stats_, MethodCompilationStat::kCHAInline); - } else { - MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface); - } + // If we didn't find a method, see if we can inline from the inline caches. + if (actual_method == nullptr) { + DCHECK(!invoke_instruction->IsInvokeStaticOrDirect()); + + return TryInlineFromInlineCache(caller_dex_file, invoke_instruction, resolved_method); + } + + // Single target. + bool result = TryInlineAndReplace(invoke_instruction, + actual_method, + ReferenceTypeInfo::CreateInvalid(), + /* do_rtp= */ true, + cha_devirtualize); + if (result) { + // Successfully inlined. + if (!invoke_instruction->IsInvokeStaticOrDirect()) { + if (cha_devirtualize) { + // Add dependency due to devirtualization. We've assumed resolved_method + // has single implementation. + outermost_graph_->AddCHASingleImplementationDependency(resolved_method); + MaybeRecordStat(stats_, MethodCompilationStat::kCHAInline); + } else { + MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface); } - } else if (!cha_devirtualize && AlwaysThrows(codegen_->GetCompilerOptions(), actual_method)) { - // Set always throws property for non-inlined method call with single target - // (unless it was obtained through CHA, because that would imply we have - // to add the CHA dependency, which seems not worth it). - invoke_instruction->SetAlwaysThrows(true); } - return result; + } else if (!cha_devirtualize && AlwaysThrows(codegen_->GetCompilerOptions(), actual_method)) { + // Set always throws property for non-inlined method call with single target + // (unless it was obtained through CHA, because that would imply we have + // to add the CHA dependency, which seems not worth it). + invoke_instruction->SetAlwaysThrows(true); } - DCHECK(!invoke_instruction->IsInvokeStaticOrDirect()); - - // Try using inline caches. - return TryInlineFromInlineCache(caller_dex_file, invoke_instruction, resolved_method); + return result; } static Handle<mirror::ObjectArray<mirror::Class>> AllocateInlineCacheHolder( @@ -604,9 +614,8 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file, switch (inline_cache_type) { case kInlineCacheNoData: { LOG_FAIL_NO_STAT() - << "Interface or virtual call to " - << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) - << " could not be statically determined"; + << "No inline cache information for call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()); return false; } @@ -693,9 +702,8 @@ HInliner::InlineCacheType HInliner::GetInlineCacheAOT( } std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> offline_profile = - pci->GetMethod(caller_dex_file.GetLocation(), - caller_dex_file.GetLocationChecksum(), - caller_compilation_unit_.GetDexMethodIndex()); + pci->GetHotMethodInfo(MethodReference( + &caller_dex_file, caller_compilation_unit_.GetDexMethodIndex())); if (offline_profile == nullptr) { return kInlineCacheNoData; // no profile information for this invocation. } @@ -748,8 +756,7 @@ HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile( } } if (!found) { - VLOG(compiler) << "Could not find profiled dex file: " - << offline_profile.dex_references[i].dex_location; + VLOG(compiler) << "Could not find profiled dex file: " << offline_profile.dex_references[i]; return kInlineCacheMissingTypes; } } @@ -1435,10 +1442,39 @@ static inline bool MayInline(const CompilerOptions& compiler_options, return true; } -bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, - ArtMethod* method, - ReferenceTypeInfo receiver_type, - HInstruction** return_replacement) { +// Returns whether inlining is allowed based on ART semantics. +bool HInliner::IsInliningAllowed(ArtMethod* method, const CodeItemDataAccessor& accessor) const { + if (!accessor.HasCodeItem()) { + LOG_FAIL_NO_STAT() + << "Method " << method->PrettyMethod() << " is not inlined because it is native"; + return false; + } + + if (!method->IsCompilable()) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified) + << "Method " << method->PrettyMethod() + << " has soft failures un-handled by the compiler, so it cannot be inlined"; + return false; + } + + if (IsMethodUnverified(codegen_->GetCompilerOptions(), method)) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified) + << "Method " << method->PrettyMethod() + << " couldn't be verified, so it cannot be inlined"; + return false; + } + + return true; +} + +// Returns whether ART supports inlining this method. +// +// Some methods are not supported because they have features for which inlining +// is not implemented. For example, we do not currently support inlining throw +// instructions into a try block. +bool HInliner::IsInliningSupported(const HInvoke* invoke_instruction, + ArtMethod* method, + const CodeItemDataAccessor& accessor) const { if (method->IsProxyMethod()) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedProxy) << "Method " << method->PrettyMethod() @@ -1446,6 +1482,29 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } + if (accessor.TriesSize() != 0) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatch) + << "Method " << method->PrettyMethod() << " is not inlined because of try block"; + return false; + } + + if (invoke_instruction->IsInvokeStaticOrDirect() && + invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) { + // Case of a static method that cannot be inlined because it implicitly + // requires an initialization check of its declaring class. + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedDexCache) + << "Method " << method->PrettyMethod() + << " is not inlined because it is static and requires a clinit" + << " check that cannot be emitted due to Dex cache limitations"; + return false; + } + + return true; +} + +// Returns whether our resource limits allow inlining this method. +bool HInliner::IsInliningBudgetAvailable(ArtMethod* method, + const CodeItemDataAccessor& accessor) const { if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRecursiveBudget) << "Method " @@ -1454,8 +1513,26 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } + size_t inline_max_code_units = codegen_->GetCompilerOptions().GetInlineMaxCodeUnits(); + if (accessor.InsnsSizeInCodeUnits() > inline_max_code_units) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCodeItem) + << "Method " << method->PrettyMethod() + << " is not inlined because its code item is too big: " + << accessor.InsnsSizeInCodeUnits() + << " > " + << inline_max_code_units; + return false; + } + + return true; +} + +bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, + ArtMethod* method, + ReferenceTypeInfo receiver_type, + HInstruction** return_replacement) { // Check whether we're allowed to inline. The outermost compilation unit is the relevant - // dex file here (though the transitivity of an inline chain would allow checking the calller). + // dex file here (though the transitivity of an inline chain would allow checking the caller). if (!MayInline(codegen_->GetCompilerOptions(), *method->GetDexFile(), *outer_compilation_unit_.GetDexFile())) { @@ -1473,60 +1550,22 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile()); - CodeItemDataAccessor accessor(method->DexInstructionData()); - if (!accessor.HasCodeItem()) { - LOG_FAIL_NO_STAT() - << "Method " << method->PrettyMethod() << " is not inlined because it is native"; - return false; - } - - size_t inline_max_code_units = codegen_->GetCompilerOptions().GetInlineMaxCodeUnits(); - if (accessor.InsnsSizeInCodeUnits() > inline_max_code_units) { - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCodeItem) - << "Method " << method->PrettyMethod() - << " is not inlined because its code item is too big: " - << accessor.InsnsSizeInCodeUnits() - << " > " - << inline_max_code_units; - return false; - } - - if (accessor.TriesSize() != 0) { - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatch) - << "Method " << method->PrettyMethod() << " is not inlined because of try block"; - return false; - } - - if (!method->IsCompilable()) { - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified) - << "Method " << method->PrettyMethod() - << " has soft failures un-handled by the compiler, so it cannot be inlined"; + if (!IsInliningAllowed(method, accessor)) { return false; } - if (IsMethodUnverified(codegen_->GetCompilerOptions(), method)) { - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified) - << "Method " << method->PrettyMethod() - << " couldn't be verified, so it cannot be inlined"; + if (!IsInliningSupported(invoke_instruction, method, accessor)) { return false; } - if (invoke_instruction->IsInvokeStaticOrDirect() && - invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) { - // Case of a static method that cannot be inlined because it implicitly - // requires an initialization check of its declaring class. - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedDexCache) - << "Method " << method->PrettyMethod() - << " is not inlined because it is static and requires a clinit" - << " check that cannot be emitted due to Dex cache limitations"; + if (!IsInliningBudgetAvailable(method, accessor)) { return false; } if (!TryBuildAndInlineHelper( - invoke_instruction, method, receiver_type, same_dex_file, return_replacement)) { + invoke_instruction, method, receiver_type, return_replacement)) { return false; } @@ -1756,105 +1795,12 @@ static bool CanEncodeInlinedMethodInStackMap(const DexFile& caller_dex_file, Art return false; } -bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, - ArtMethod* resolved_method, - ReferenceTypeInfo receiver_type, - bool same_dex_file, - HInstruction** return_replacement) { - DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid())); - ScopedObjectAccess soa(Thread::Current()); - const dex::CodeItem* code_item = resolved_method->GetCodeItem(); - const DexFile& callee_dex_file = *resolved_method->GetDexFile(); - uint32_t method_index = resolved_method->GetDexMethodIndex(); - CodeItemDebugInfoAccessor code_item_accessor(resolved_method->DexInstructionDebugInfo()); - ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); - Handle<mirror::DexCache> dex_cache = NewHandleIfDifferent(resolved_method->GetDexCache(), - caller_compilation_unit_.GetDexCache(), - handles_); - Handle<mirror::ClassLoader> class_loader = - NewHandleIfDifferent(resolved_method->GetDeclaringClass()->GetClassLoader(), - caller_compilation_unit_.GetClassLoader(), - handles_); - - Handle<mirror::Class> compiling_class = handles_->NewHandle(resolved_method->GetDeclaringClass()); - DexCompilationUnit dex_compilation_unit( - class_loader, - class_linker, - callee_dex_file, - code_item, - resolved_method->GetDeclaringClass()->GetDexClassDefIndex(), - method_index, - resolved_method->GetAccessFlags(), - /* verified_method= */ nullptr, - dex_cache, - compiling_class); - - InvokeType invoke_type = invoke_instruction->GetInvokeType(); - if (invoke_type == kInterface) { - // We have statically resolved the dispatch. To please the class linker - // at runtime, we change this call as if it was a virtual call. - invoke_type = kVirtual; - } - - bool caller_dead_reference_safe = graph_->IsDeadReferenceSafe(); - const dex::ClassDef& callee_class = resolved_method->GetClassDef(); - // MethodContainsRSensitiveAccess is currently slow, but HasDeadReferenceSafeAnnotation() - // is currently rarely true. - bool callee_dead_reference_safe = - annotations::HasDeadReferenceSafeAnnotation(callee_dex_file, callee_class) - && !annotations::MethodContainsRSensitiveAccess(callee_dex_file, callee_class, method_index); - - const int32_t caller_instruction_counter = graph_->GetCurrentInstructionId(); - HGraph* callee_graph = new (graph_->GetAllocator()) HGraph( - graph_->GetAllocator(), - graph_->GetArenaStack(), - callee_dex_file, - method_index, - codegen_->GetCompilerOptions().GetInstructionSet(), - invoke_type, - callee_dead_reference_safe, - graph_->IsDebuggable(), - /* osr= */ false, - caller_instruction_counter); - callee_graph->SetArtMethod(resolved_method); - - // When they are needed, allocate `inline_stats_` on the Arena instead - // of on the stack, as Clang might produce a stack frame too large - // for this function, that would not fit the requirements of the - // `-Wframe-larger-than` option. - if (stats_ != nullptr) { - // Reuse one object for all inline attempts from this caller to keep Arena memory usage low. - if (inline_stats_ == nullptr) { - void* storage = graph_->GetAllocator()->Alloc<OptimizingCompilerStats>(kArenaAllocMisc); - inline_stats_ = new (storage) OptimizingCompilerStats; - } else { - inline_stats_->Reset(); - } - } - HGraphBuilder builder(callee_graph, - code_item_accessor, - &dex_compilation_unit, - &outer_compilation_unit_, - codegen_, - inline_stats_, - resolved_method->GetQuickenedInfo(), - handles_); - - if (builder.BuildGraph() != kAnalysisSuccess) { - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCannotBuild) - << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be built, so cannot be inlined"; - return false; - } - - if (!RegisterAllocator::CanAllocateRegistersFor( - *callee_graph, codegen_->GetCompilerOptions().GetInstructionSet())) { - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRegisterAllocator) - << "Method " << callee_dex_file.PrettyMethod(method_index) - << " cannot be inlined because of the register allocator"; - return false; - } - + // Substitutes parameters in the callee graph with their values from the caller. +void HInliner::SubstituteArguments(HGraph* callee_graph, + HInvoke* invoke_instruction, + ReferenceTypeInfo receiver_type, + const DexCompilationUnit& dex_compilation_unit) { + ArtMethod* const resolved_method = callee_graph->GetArtMethod(); size_t parameter_index = 0; bool run_rtp = false; for (HInstructionIterator instructions(callee_graph->GetEntryBlock()->GetInstructions()); @@ -1897,8 +1843,23 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, handles_, /* is_first_run= */ false).Run(); } +} - RunOptimizations(callee_graph, code_item, dex_compilation_unit); +// Returns whether we can inline the callee_graph into the target_block. +// +// This performs a combination of semantics checks, compiler support checks, and +// resource limit checks. +// +// If this function returns true, it will also set out_number_of_instructions to +// the number of instructions in the inlined body. +bool HInliner::CanInlineBody(const HGraph* callee_graph, + const HBasicBlock* target_block, + size_t* out_number_of_instructions) const { + const DexFile& callee_dex_file = callee_graph->GetDexFile(); + ArtMethod* const resolved_method = callee_graph->GetArtMethod(); + const uint32_t method_index = resolved_method->GetMethodIndex(); + const bool same_dex_file = + IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *resolved_method->GetDexFile()); HBasicBlock* exit_block = callee_graph->GetExitBlock(); if (exit_block == nullptr) { @@ -1911,7 +1872,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, bool has_one_return = false; for (HBasicBlock* predecessor : exit_block->GetPredecessors()) { if (predecessor->GetLastInstruction()->IsThrow()) { - if (invoke_instruction->GetBlock()->IsTryBlock()) { + if (target_block->IsTryBlock()) { // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto. LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatch) << "Method " << callee_dex_file.PrettyMethod(method_index) @@ -2020,6 +1981,111 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, } } } + + *out_number_of_instructions = number_of_instructions; + return true; +} + +bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, + ArtMethod* resolved_method, + ReferenceTypeInfo receiver_type, + HInstruction** return_replacement) { + DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid())); + const dex::CodeItem* code_item = resolved_method->GetCodeItem(); + const DexFile& callee_dex_file = *resolved_method->GetDexFile(); + uint32_t method_index = resolved_method->GetDexMethodIndex(); + CodeItemDebugInfoAccessor code_item_accessor(resolved_method->DexInstructionDebugInfo()); + ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); + Handle<mirror::DexCache> dex_cache = NewHandleIfDifferent(resolved_method->GetDexCache(), + caller_compilation_unit_.GetDexCache(), + handles_); + Handle<mirror::ClassLoader> class_loader = + NewHandleIfDifferent(resolved_method->GetDeclaringClass()->GetClassLoader(), + caller_compilation_unit_.GetClassLoader(), + handles_); + + Handle<mirror::Class> compiling_class = handles_->NewHandle(resolved_method->GetDeclaringClass()); + DexCompilationUnit dex_compilation_unit( + class_loader, + class_linker, + callee_dex_file, + code_item, + resolved_method->GetDeclaringClass()->GetDexClassDefIndex(), + method_index, + resolved_method->GetAccessFlags(), + /* verified_method= */ nullptr, + dex_cache, + compiling_class); + + InvokeType invoke_type = invoke_instruction->GetInvokeType(); + if (invoke_type == kInterface) { + // We have statically resolved the dispatch. To please the class linker + // at runtime, we change this call as if it was a virtual call. + invoke_type = kVirtual; + } + + bool caller_dead_reference_safe = graph_->IsDeadReferenceSafe(); + const dex::ClassDef& callee_class = resolved_method->GetClassDef(); + // MethodContainsRSensitiveAccess is currently slow, but HasDeadReferenceSafeAnnotation() + // is currently rarely true. + bool callee_dead_reference_safe = + annotations::HasDeadReferenceSafeAnnotation(callee_dex_file, callee_class) + && !annotations::MethodContainsRSensitiveAccess(callee_dex_file, callee_class, method_index); + + const int32_t caller_instruction_counter = graph_->GetCurrentInstructionId(); + HGraph* callee_graph = new (graph_->GetAllocator()) HGraph( + graph_->GetAllocator(), + graph_->GetArenaStack(), + callee_dex_file, + method_index, + codegen_->GetCompilerOptions().GetInstructionSet(), + invoke_type, + callee_dead_reference_safe, + graph_->IsDebuggable(), + /* osr= */ false, + /* is_shared_jit_code= */ graph_->IsCompilingForSharedJitCode(), + /* baseline= */ graph_->IsCompilingBaseline(), + /* start_instruction_id= */ caller_instruction_counter); + callee_graph->SetArtMethod(resolved_method); + + // When they are needed, allocate `inline_stats_` on the Arena instead + // of on the stack, as Clang might produce a stack frame too large + // for this function, that would not fit the requirements of the + // `-Wframe-larger-than` option. + if (stats_ != nullptr) { + // Reuse one object for all inline attempts from this caller to keep Arena memory usage low. + if (inline_stats_ == nullptr) { + void* storage = graph_->GetAllocator()->Alloc<OptimizingCompilerStats>(kArenaAllocMisc); + inline_stats_ = new (storage) OptimizingCompilerStats; + } else { + inline_stats_->Reset(); + } + } + HGraphBuilder builder(callee_graph, + code_item_accessor, + &dex_compilation_unit, + &outer_compilation_unit_, + codegen_, + inline_stats_, + resolved_method->GetQuickenedInfo(), + handles_); + + if (builder.BuildGraph() != kAnalysisSuccess) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCannotBuild) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be built, so cannot be inlined"; + return false; + } + + SubstituteArguments(callee_graph, invoke_instruction, receiver_type, dex_compilation_unit); + + RunOptimizations(callee_graph, code_item, dex_compilation_unit); + + size_t number_of_instructions = 0; + if (!CanInlineBody(callee_graph, invoke_instruction->GetBlock(), &number_of_instructions)) { + return false; + } + DCHECK_EQ(caller_instruction_counter, graph_->GetCurrentInstructionId()) << "No instructions can be added to the outer graph while inner graph is being built"; diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 15d7349694..882ba4e58b 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -73,6 +73,15 @@ class HInliner : public HOptimization { bool TryInline(HInvoke* invoke_instruction); + // Attempt to resolve the target of the invoke instruction to an acutal call + // target. + // + // Returns the target directly in the case of static or direct invokes. + // Otherwise, uses CHA devirtualization or other methods to try to find the + // call target. + ArtMethod* FindActualCallTarget(HInvoke* invoke_instruction, bool* cha_devirtualize) + REQUIRES_SHARED(Locks::mutator_lock_); + // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether // reference type propagation can run after the inlining. If the inlining is successful, this // method will replace and remove the `invoke_instruction`. If `cha_devirtualize` is true, @@ -93,8 +102,15 @@ class HInliner : public HOptimization { bool TryBuildAndInlineHelper(HInvoke* invoke_instruction, ArtMethod* resolved_method, ReferenceTypeInfo receiver_type, - bool same_dex_file, - HInstruction** return_replacement); + HInstruction** return_replacement) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Substitutes parameters in the callee graph with their values from the caller. + void SubstituteArguments(HGraph* callee_graph, + HInvoke* invoke_instruction, + ReferenceTypeInfo receiver_type, + const DexCompilationUnit& dex_compilation_unit) + REQUIRES_SHARED(Locks::mutator_lock_); // Run simple optimizations on `callee_graph`. void RunOptimizations(HGraph* callee_graph, @@ -108,6 +124,38 @@ class HInliner : public HOptimization { HInstruction** return_replacement) REQUIRES_SHARED(Locks::mutator_lock_); + // Returns whether inlining is allowed based on ART semantics. + bool IsInliningAllowed(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const + REQUIRES_SHARED(Locks::mutator_lock_); + + + // Returns whether ART supports inlining this method. + // + // Some methods are not supported because they have features for which inlining + // is not implemented. For example, we do not currently support inlining throw + // instructions into a try block. + bool IsInliningSupported(const HInvoke* invoke_instruction, + art::ArtMethod* method, + const CodeItemDataAccessor& accessor) const + REQUIRES_SHARED(Locks::mutator_lock_); + + // Returns whether the inlining budget allows inlining method. + // + // For example, this checks whether the function has grown too large and + // inlining should be prevented. + bool IsInliningBudgetAvailable(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const + REQUIRES_SHARED(Locks::mutator_lock_); + + // Inspects the body of a method (callee_graph) and returns whether it can be + // inlined. + // + // This checks for instructions and constructs that we do not support + // inlining, such as inlining a throw instruction into a try block. + bool CanInlineBody(const HGraph* callee_graph, + const HBasicBlock* target_block, + size_t* out_number_of_instructions) const + REQUIRES_SHARED(Locks::mutator_lock_); + // Create a new HInstanceFieldGet. HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index, ArtMethod* referrer, diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 5e7b57523f..1e7b48e4a5 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -29,6 +29,7 @@ #include "driver/dex_compilation_unit.h" #include "driver/compiler_options.h" #include "imtable-inl.h" +#include "jit/jit.h" #include "mirror/dex_cache.h" #include "oat_file.h" #include "optimizing_compiler_stats.h" @@ -1001,14 +1002,27 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, resolved_method->GetMethodIndex()); } else { DCHECK_EQ(invoke_type, kInterface); - ScopedObjectAccess soa(Thread::Current()); // Needed for the IMT index. - invoke = new (allocator_) HInvokeInterface(allocator_, + ScopedObjectAccess soa(Thread::Current()); // Needed for the IMT index and class check below. + if (resolved_method->GetDeclaringClass()->IsObjectClass()) { + // If the resolved method is from j.l.Object, emit a virtual call instead. + // The IMT conflict stub only handles interface methods. + invoke = new (allocator_) HInvokeVirtual(allocator_, number_of_arguments, return_type, dex_pc, method_idx, resolved_method, - ImTable::GetImtIndex(resolved_method)); + resolved_method->GetMethodIndex()); + } else { + DCHECK(resolved_method->GetDeclaringClass()->IsInterface()); + invoke = new (allocator_) HInvokeInterface(allocator_, + number_of_arguments, + return_type, + dex_pc, + method_idx, + resolved_method, + ImTable::GetImtIndex(resolved_method)); + } } return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false, clinit_check); } @@ -1139,12 +1153,15 @@ void HInstructionBuilder::BuildConstructorFenceForAllocation(HInstruction* alloc static bool IsInBootImage(ObjPtr<mirror::Class> cls, const CompilerOptions& compiler_options) REQUIRES_SHARED(Locks::mutator_lock_) { - if (compiler_options.IsBootImage()) { + if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(cls)) { + return true; + } + if (compiler_options.IsBootImage() || compiler_options.IsBootImageExtension()) { std::string temp; const char* descriptor = cls->GetDescriptor(&temp); return compiler_options.IsImageClass(descriptor); } else { - return Runtime::Current()->GetHeap()->FindSpaceFromObject(cls, false)->IsImageSpace(); + return false; } } @@ -1290,15 +1307,20 @@ bool HInstructionBuilder::IsInitialized(Handle<mirror::Class> cls) const { // Check if the class will be initialized at runtime. if (cls->IsInitialized()) { Runtime* runtime = Runtime::Current(); - if (!runtime->IsAotCompiler()) { + if (runtime->IsAotCompiler()) { + // Assume loaded only if klass is in the boot image. App classes cannot be assumed + // loaded because we don't even know what class loader will be used to load them. + if (IsInBootImage(cls.Get(), code_generator_->GetCompilerOptions())) { + return true; + } + } else { DCHECK(runtime->UseJitCompilation()); - // For JIT, the class cannot revert to an uninitialized state. - return true; - } - // Assume loaded only if klass is in the boot image. App classes cannot be assumed - // loaded because we don't even know what class loader will be used to load them. - if (IsInBootImage(cls.Get(), code_generator_->GetCompilerOptions())) { - return true; + if (Runtime::Current()->GetJit()->CanAssumeInitialized( + cls.Get(), + graph_->IsCompilingForSharedJitCode())) { + // For JIT, the class cannot revert to an uninitialized state. + return true; + } } } @@ -3090,6 +3112,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, LoadLocal(instruction.VRegA_11x(), DataType::Type::kReference), HMonitorOperation::OperationKind::kEnter, dex_pc)); + graph_->SetHasMonitorOperations(true); break; } @@ -3098,6 +3121,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, LoadLocal(instruction.VRegA_11x(), DataType::Type::kReference), HMonitorOperation::OperationKind::kExit, dex_pc)); + graph_->SetHasMonitorOperations(true); break; } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index ce62495ae1..84297ec557 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -25,6 +25,7 @@ #include "mirror/class-inl.h" #include "scoped_thread_state_change-inl.h" #include "sharpening.h" +#include "string_builder_append.h" namespace art { @@ -275,17 +276,6 @@ bool InstructionSimplifierVisitor::TryCombineVecMultiplyAccumulate(HVecMul* mul) return false; } break; - case InstructionSet::kMips: - case InstructionSet::kMips64: - if (!(type == DataType::Type::kUint8 || - type == DataType::Type::kInt8 || - type == DataType::Type::kUint16 || - type == DataType::Type::kInt16 || - type == DataType::Type::kInt32 || - type == DataType::Type::kInt64)) { - return false; - } - break; default: return false; } @@ -2467,6 +2457,192 @@ static bool NoEscapeForStringBufferReference(HInstruction* reference, HInstructi return false; } +static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { + DCHECK_EQ(invoke->GetIntrinsic(), Intrinsics::kStringBuilderToString); + if (invoke->CanThrowIntoCatchBlock()) { + return false; + } + + HBasicBlock* block = invoke->GetBlock(); + HInstruction* sb = invoke->InputAt(0); + + // We support only a new StringBuilder, otherwise we cannot ensure that + // the StringBuilder data does not need to be populated for other users. + if (!sb->IsNewInstance()) { + return false; + } + + // For now, we support only single-block recognition. + // (Ternary operators feeding the append could be implemented.) + for (const HUseListNode<HInstruction*>& use : sb->GetUses()) { + if (use.GetUser()->GetBlock() != block) { + return false; + } + // The append pattern uses the StringBuilder only as the first argument. + if (use.GetIndex() != 0u) { + return false; + } + } + + // Collect args and check for unexpected uses. + // We expect one call to a constructor with no arguments, one constructor fence (unless + // eliminated), some number of append calls and one call to StringBuilder.toString(). + bool seen_constructor = false; + bool seen_constructor_fence = false; + bool seen_to_string = false; + uint32_t format = 0u; + uint32_t num_args = 0u; + HInstruction* args[StringBuilderAppend::kMaxArgs]; // Added in reverse order. + for (HBackwardInstructionIterator iter(block->GetInstructions()); !iter.Done(); iter.Advance()) { + HInstruction* user = iter.Current(); + // Instructions of interest apply to `sb`, skip those that do not involve `sb`. + if (user->InputCount() == 0u || user->InputAt(0u) != sb) { + continue; + } + // We visit the uses in reverse order, so the StringBuilder.toString() must come first. + if (!seen_to_string) { + if (user == invoke) { + seen_to_string = true; + continue; + } else { + return false; + } + } + // Then we should see the arguments. + if (user->IsInvokeVirtual()) { + HInvokeVirtual* as_invoke_virtual = user->AsInvokeVirtual(); + DCHECK(!seen_constructor); + DCHECK(!seen_constructor_fence); + StringBuilderAppend::Argument arg; + switch (as_invoke_virtual->GetIntrinsic()) { + case Intrinsics::kStringBuilderAppendObject: + // TODO: Unimplemented, needs to call String.valueOf(). + return false; + case Intrinsics::kStringBuilderAppendString: + arg = StringBuilderAppend::Argument::kString; + break; + case Intrinsics::kStringBuilderAppendCharArray: + // TODO: Unimplemented, StringBuilder.append(char[]) can throw NPE and we would + // not have the correct stack trace for it. + return false; + case Intrinsics::kStringBuilderAppendBoolean: + arg = StringBuilderAppend::Argument::kBoolean; + break; + case Intrinsics::kStringBuilderAppendChar: + arg = StringBuilderAppend::Argument::kChar; + break; + case Intrinsics::kStringBuilderAppendInt: + arg = StringBuilderAppend::Argument::kInt; + break; + case Intrinsics::kStringBuilderAppendLong: + arg = StringBuilderAppend::Argument::kLong; + break; + case Intrinsics::kStringBuilderAppendCharSequence: { + ReferenceTypeInfo rti = user->AsInvokeVirtual()->InputAt(1)->GetReferenceTypeInfo(); + if (!rti.IsValid()) { + return false; + } + ScopedObjectAccess soa(Thread::Current()); + Handle<mirror::Class> input_type = rti.GetTypeHandle(); + DCHECK(input_type != nullptr); + if (input_type.Get() == GetClassRoot<mirror::String>()) { + arg = StringBuilderAppend::Argument::kString; + } else { + // TODO: Check and implement for StringBuilder. We could find the StringBuilder's + // internal char[] inconsistent with the length, or the string compression + // of the result could be compromised with a concurrent modification, and + // we would need to throw appropriate exceptions. + return false; + } + break; + } + case Intrinsics::kStringBuilderAppendFloat: + case Intrinsics::kStringBuilderAppendDouble: + // TODO: Unimplemented, needs to call FloatingDecimal.getBinaryToASCIIConverter(). + return false; + default: { + return false; + } + } + // Uses of the append return value should have been replaced with the first input. + DCHECK(!as_invoke_virtual->HasUses()); + DCHECK(!as_invoke_virtual->HasEnvironmentUses()); + if (num_args == StringBuilderAppend::kMaxArgs) { + return false; + } + format = (format << StringBuilderAppend::kBitsPerArg) | static_cast<uint32_t>(arg); + args[num_args] = as_invoke_virtual->InputAt(1u); + ++num_args; + } else if (user->IsInvokeStaticOrDirect() && + user->AsInvokeStaticOrDirect()->GetResolvedMethod() != nullptr && + user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() && + user->AsInvokeStaticOrDirect()->GetNumberOfArguments() == 1u) { + // After arguments, we should see the constructor. + // We accept only the constructor with no extra arguments. + DCHECK(!seen_constructor); + DCHECK(!seen_constructor_fence); + seen_constructor = true; + } else if (user->IsConstructorFence()) { + // The last use we see is the constructor fence. + DCHECK(seen_constructor); + DCHECK(!seen_constructor_fence); + seen_constructor_fence = true; + } else { + return false; + } + } + + if (num_args == 0u) { + return false; + } + + // Check environment uses. + for (const HUseListNode<HEnvironment*>& use : sb->GetEnvUses()) { + HInstruction* holder = use.GetUser()->GetHolder(); + if (holder->GetBlock() != block) { + return false; + } + // Accept only calls on the StringBuilder (which shall all be removed). + // TODO: Carve-out for const-string? Or rely on environment pruning (to be implemented)? + if (holder->InputCount() == 0 || holder->InputAt(0) != sb) { + return false; + } + } + + // Create replacement instruction. + HIntConstant* fmt = block->GetGraph()->GetIntConstant(static_cast<int32_t>(format)); + ArenaAllocator* allocator = block->GetGraph()->GetAllocator(); + HStringBuilderAppend* append = + new (allocator) HStringBuilderAppend(fmt, num_args, allocator, invoke->GetDexPc()); + append->SetReferenceTypeInfo(invoke->GetReferenceTypeInfo()); + for (size_t i = 0; i != num_args; ++i) { + append->SetArgumentAt(i, args[num_args - 1u - i]); + } + block->InsertInstructionBefore(append, invoke); + DCHECK(!invoke->CanBeNull()); + DCHECK(!append->CanBeNull()); + invoke->ReplaceWith(append); + // Copy environment, except for the StringBuilder uses. + for (HEnvironment* env = invoke->GetEnvironment(); env != nullptr; env = env->GetParent()) { + for (size_t i = 0, size = env->Size(); i != size; ++i) { + if (env->GetInstructionAt(i) == sb) { + env->RemoveAsUserOfInput(i); + env->SetRawEnvAt(i, /*instruction=*/ nullptr); + } + } + } + append->CopyEnvironmentFrom(invoke->GetEnvironment()); + // Remove the old instruction. + block->RemoveInstruction(invoke); + // Remove the StringBuilder's uses and StringBuilder. + while (sb->HasNonEnvironmentUses()) { + block->RemoveInstruction(sb->GetUses().front().GetUser()); + } + DCHECK(!sb->HasEnvironmentUses()); + block->RemoveInstruction(sb); + return true; +} + // Certain allocation intrinsics are not removed by dead code elimination // because of potentially throwing an OOM exception or other side effects. // This method removes such intrinsics when special circumstances allow. @@ -2481,6 +2657,9 @@ void InstructionSimplifierVisitor::SimplifyAllocationIntrinsic(HInvoke* invoke) invoke->GetBlock()->RemoveInstruction(invoke); RecordSimplification(); } + } else if (invoke->GetIntrinsic() == Intrinsics::kStringBuilderToString && + TryReplaceStringBuilderAppend(invoke)) { + RecordSimplification(); } } @@ -2569,7 +2748,16 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { SimplifyNPEOnArgN(instruction, 1); // 0th has own NullCheck break; case Intrinsics::kStringBufferAppend: - case Intrinsics::kStringBuilderAppend: + case Intrinsics::kStringBuilderAppendObject: + case Intrinsics::kStringBuilderAppendString: + case Intrinsics::kStringBuilderAppendCharSequence: + case Intrinsics::kStringBuilderAppendCharArray: + case Intrinsics::kStringBuilderAppendBoolean: + case Intrinsics::kStringBuilderAppendChar: + case Intrinsics::kStringBuilderAppendInt: + case Intrinsics::kStringBuilderAppendLong: + case Intrinsics::kStringBuilderAppendFloat: + case Intrinsics::kStringBuilderAppendDouble: SimplifyReturnThis(instruction); break; case Intrinsics::kStringBufferToString: diff --git a/compiler/optimizing/instruction_simplifier_mips.cc b/compiler/optimizing/instruction_simplifier_mips.cc deleted file mode 100644 index 5d0c63b76b..0000000000 --- a/compiler/optimizing/instruction_simplifier_mips.cc +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (C) 2017 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "instruction_simplifier_mips.h" - -#include "arch/mips/instruction_set_features_mips.h" -#include "mirror/array-inl.h" - -namespace art { -namespace mips { - -class InstructionSimplifierMipsVisitor : public HGraphVisitor { - public: - InstructionSimplifierMipsVisitor(HGraph* graph, - CodeGenerator* codegen, - OptimizingCompilerStats* stats) - : HGraphVisitor(graph), - stats_(stats), - codegen_(down_cast<CodeGeneratorMIPS*>(codegen)) {} - - private: - void RecordSimplification() { - MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch); - } - - bool TryExtractArrayAccessIndex(HInstruction* access, - HInstruction* index, - DataType::Type packed_type); - void VisitArrayGet(HArrayGet* instruction) override; - void VisitArraySet(HArraySet* instruction) override; - - OptimizingCompilerStats* stats_; - CodeGeneratorMIPS* codegen_; -}; - -bool InstructionSimplifierMipsVisitor::TryExtractArrayAccessIndex(HInstruction* access, - HInstruction* index, - DataType::Type packed_type) { - if (codegen_->GetInstructionSetFeatures().IsR6() || - codegen_->GetInstructionSetFeatures().HasMsa()) { - return false; - } - if (index->IsConstant() || - (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) { - // If index is constant the whole address calculation often can be done by load/store - // instructions themselves. - // TODO: Treat the case with non-embeddable constants. - return false; - } - - if (packed_type != DataType::Type::kInt16 && packed_type != DataType::Type::kUint16 && - packed_type != DataType::Type::kInt32 && packed_type != DataType::Type::kInt64 && - packed_type != DataType::Type::kFloat32 && packed_type != DataType::Type::kFloat64) { - return false; - } - - if (access->IsArrayGet() && access->AsArrayGet()->IsStringCharAt()) { - return false; - } - - HGraph* graph = access->GetBlock()->GetGraph(); - ArenaAllocator* allocator = graph->GetAllocator(); - size_t component_shift = DataType::SizeShift(packed_type); - - bool is_extracting_beneficial = false; - // It is beneficial to extract index intermediate address only if there are at least 2 users. - for (const HUseListNode<HInstruction*>& use : index->GetUses()) { - HInstruction* user = use.GetUser(); - if (user->IsArrayGet() && user != access && !user->AsArrayGet()->IsStringCharAt()) { - HArrayGet* another_access = user->AsArrayGet(); - DataType::Type another_packed_type = another_access->GetType(); - size_t another_component_shift = DataType::SizeShift(another_packed_type); - if (another_component_shift == component_shift) { - is_extracting_beneficial = true; - break; - } - } else if (user->IsArraySet() && user != access) { - HArraySet* another_access = user->AsArraySet(); - DataType::Type another_packed_type = another_access->GetType(); - size_t another_component_shift = DataType::SizeShift(another_packed_type); - if (another_component_shift == component_shift) { - is_extracting_beneficial = true; - break; - } - } else if (user->IsIntermediateArrayAddressIndex()) { - HIntermediateArrayAddressIndex* another_access = user->AsIntermediateArrayAddressIndex(); - size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue(); - if (another_component_shift == component_shift) { - is_extracting_beneficial = true; - break; - } - } - } - - if (!is_extracting_beneficial) { - return false; - } - - HIntConstant* shift = graph->GetIntConstant(component_shift); - HIntermediateArrayAddressIndex* address = - new (allocator) HIntermediateArrayAddressIndex(index, shift, kNoDexPc); - access->GetBlock()->InsertInstructionBefore(address, access); - access->ReplaceInput(address, 1); - return true; -} - -void InstructionSimplifierMipsVisitor::VisitArrayGet(HArrayGet* instruction) { - DataType::Type packed_type = instruction->GetType(); - if (TryExtractArrayAccessIndex(instruction, instruction->GetIndex(), packed_type)) { - RecordSimplification(); - } -} - -void InstructionSimplifierMipsVisitor::VisitArraySet(HArraySet* instruction) { - DataType::Type packed_type = instruction->GetComponentType(); - if (TryExtractArrayAccessIndex(instruction, instruction->GetIndex(), packed_type)) { - RecordSimplification(); - } -} - -bool InstructionSimplifierMips::Run() { - InstructionSimplifierMipsVisitor visitor(graph_, codegen_, stats_); - visitor.VisitReversePostOrder(); - return true; -} - -} // namespace mips -} // namespace art diff --git a/compiler/optimizing/instruction_simplifier_mips.h b/compiler/optimizing/instruction_simplifier_mips.h deleted file mode 100644 index b431334811..0000000000 --- a/compiler/optimizing/instruction_simplifier_mips.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (C) 2017 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_MIPS_H_ -#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_MIPS_H_ - -#include "nodes.h" -#include "optimization.h" -#include "code_generator_mips.h" - -namespace art { - -class CodeGenerator; - -namespace mips { - -class InstructionSimplifierMips : public HOptimization { - public: - InstructionSimplifierMips(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) - : HOptimization(graph, kInstructionSimplifierMipsPassName, stats), - codegen_(down_cast<CodeGeneratorMIPS*>(codegen)) {} - - static constexpr const char* kInstructionSimplifierMipsPassName = "instruction_simplifier_mips"; - - bool Run() override; - - private: - CodeGeneratorMIPS* codegen_; -}; - -} // namespace mips -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_MIPS_H_ diff --git a/compiler/optimizing/intrinsic_objects.cc b/compiler/optimizing/intrinsic_objects.cc index c345624a7a..5f6f562161 100644 --- a/compiler/optimizing/intrinsic_objects.cc +++ b/compiler/optimizing/intrinsic_objects.cc @@ -17,18 +17,18 @@ #include "intrinsic_objects.h" #include "art_field-inl.h" +#include "base/casts.h" #include "base/logging.h" -#include "class_root.h" -#include "handle.h" +#include "image.h" #include "obj_ptr-inl.h" -#include "mirror/object_array-alloc-inl.h" -#include "mirror/object_array-inl.h" namespace art { -static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache(Thread* self, - ClassLinker* class_linker) - REQUIRES_SHARED(Locks::mutator_lock_) { +static constexpr size_t kIntrinsicObjectsOffset = + enum_cast<size_t>(ImageHeader::kIntrinsicObjectsStart); + +ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::LookupIntegerCache( + Thread* self, ClassLinker* class_linker) { ObjPtr<mirror::Class> integer_cache_class = class_linker->LookupClass( self, "Ljava/lang/Integer$IntegerCache;", /* class_loader= */ nullptr); if (integer_cache_class == nullptr || !integer_cache_class->IsInitialized()) { @@ -44,47 +44,24 @@ static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache(Thread* se return integer_cache; } -ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::AllocateBootImageLiveObjects( - Thread* self, - ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_) { - // The objects used for the Integer.valueOf() intrinsic must remain live even if references - // to them are removed using reflection. Image roots are not accessible through reflection, - // so the array we construct here shall keep them alive. - StackHandleScope<1> hs(self); - Handle<mirror::ObjectArray<mirror::Object>> integer_cache = - hs.NewHandle(LookupIntegerCache(self, class_linker)); - size_t live_objects_size = - (integer_cache != nullptr) ? (/* cache */ 1u + integer_cache->GetLength()) : 0u; - ObjPtr<mirror::ObjectArray<mirror::Object>> live_objects = - mirror::ObjectArray<mirror::Object>::Alloc( - self, GetClassRoot<mirror::ObjectArray<mirror::Object>>(class_linker), live_objects_size); - int32_t index = 0; - if (integer_cache != nullptr) { - live_objects->Set(index++, integer_cache.Get()); - for (int32_t i = 0, length = integer_cache->GetLength(); i != length; ++i) { - live_objects->Set(index++, integer_cache->Get(i)); - } - } - CHECK_EQ(index, live_objects->GetLength()); - - if (kIsDebugBuild && integer_cache != nullptr) { - CHECK_EQ(integer_cache.Get(), GetIntegerValueOfCache(live_objects)); - for (int32_t i = 0, len = integer_cache->GetLength(); i != len; ++i) { - CHECK_EQ(integer_cache->GetWithoutChecks(i), GetIntegerValueOfObject(live_objects, i)); - } - } - return live_objects; +static bool HasIntrinsicObjects( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(boot_image_live_objects != nullptr); + uint32_t length = static_cast<uint32_t>(boot_image_live_objects->GetLength()); + DCHECK_GE(length, kIntrinsicObjectsOffset); + return length != kIntrinsicObjectsOffset; } ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::GetIntegerValueOfCache( ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) { - DCHECK(boot_image_live_objects != nullptr); - if (boot_image_live_objects->GetLength() == 0u) { + if (!HasIntrinsicObjects(boot_image_live_objects)) { return nullptr; // No intrinsic objects. } // No need for read barrier for boot image object or for verifying the value that was just stored. ObjPtr<mirror::Object> result = - boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(0); + boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>( + kIntrinsicObjectsOffset); DCHECK(result != nullptr); DCHECK(result->IsObjectArray()); DCHECK(result->GetClass()->DescriptorEquals("[Ljava/lang/Integer;")); @@ -94,15 +71,14 @@ ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::GetIntegerValueOfC ObjPtr<mirror::Object> IntrinsicObjects::GetIntegerValueOfObject( ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, uint32_t index) { - DCHECK(boot_image_live_objects != nullptr); - DCHECK_NE(boot_image_live_objects->GetLength(), 0); + DCHECK(HasIntrinsicObjects(boot_image_live_objects)); DCHECK_LT(index, static_cast<uint32_t>(GetIntegerValueOfCache(boot_image_live_objects)->GetLength())); // No need for read barrier for boot image object or for verifying the value that was just stored. ObjPtr<mirror::Object> result = boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>( - /* skip the IntegerCache.cache */ 1u + index); + kIntrinsicObjectsOffset + /* skip the IntegerCache.cache */ 1u + index); DCHECK(result != nullptr); DCHECK(result->GetClass()->DescriptorEquals("Ljava/lang/Integer;")); return result; @@ -110,8 +86,9 @@ ObjPtr<mirror::Object> IntrinsicObjects::GetIntegerValueOfObject( MemberOffset IntrinsicObjects::GetIntegerValueOfArrayDataOffset( ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) { - DCHECK_NE(boot_image_live_objects->GetLength(), 0); - MemberOffset result = mirror::ObjectArray<mirror::Object>::OffsetOfElement(1u); + DCHECK(HasIntrinsicObjects(boot_image_live_objects)); + MemberOffset result = + mirror::ObjectArray<mirror::Object>::OffsetOfElement(kIntrinsicObjectsOffset + 1u); DCHECK_EQ(GetIntegerValueOfObject(boot_image_live_objects, 0u), (boot_image_live_objects ->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(result))); diff --git a/compiler/optimizing/intrinsic_objects.h b/compiler/optimizing/intrinsic_objects.h index 863017be38..ed764bd4b2 100644 --- a/compiler/optimizing/intrinsic_objects.h +++ b/compiler/optimizing/intrinsic_objects.h @@ -55,11 +55,9 @@ class IntrinsicObjects { return IndexField::Decode(intrinsic_data); } - static ObjPtr<mirror::ObjectArray<mirror::Object>> AllocateBootImageLiveObjects( - Thread* self, - ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_); - // Functions for retrieving data for Integer.valueOf(). + static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache( + Thread* self, ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_); static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache( ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) REQUIRES_SHARED(Locks::mutator_lock_); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index ec5d17a443..d88e034810 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -54,6 +54,7 @@ using helpers::RegisterFrom; using helpers::SRegisterFrom; using helpers::WRegisterFrom; using helpers::XRegisterFrom; +using helpers::HRegisterFrom; using helpers::InputRegisterAt; using helpers::OutputRegister; @@ -90,8 +91,8 @@ static void MoveFromReturnRegister(Location trg, Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type); __ Mov(trg_reg, res_reg, kDiscardForSameWReg); } else { - FPRegister trg_reg = FPRegisterFrom(trg, type); - FPRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type); + VRegister trg_reg = FPRegisterFrom(trg, type); + VRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type); __ Fmov(trg_reg, res_reg); } } @@ -299,6 +300,14 @@ static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } +static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + static void GenReverseBytes(LocationSummary* locations, DataType::Type type, MacroAssembler* masm) { @@ -435,7 +444,7 @@ static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* mas Register src = InputRegisterAt(instr, 0); Register dst = RegisterFrom(instr->GetLocations()->Out(), type); - FPRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS(); + VRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS(); __ Fmov(fpr, src); __ Cnt(fpr.V8B(), fpr.V8B()); @@ -591,8 +600,8 @@ static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAs // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2. // If we were using this instruction, for most inputs, more handling code would be needed. LocationSummary* l = invoke->GetLocations(); - FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0)); - FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0)); + VRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0)); + VRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0)); Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out()); vixl::aarch64::Label done; @@ -1960,7 +1969,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register tmp2 = temps.AcquireX(); vixl::aarch64::Label done; - vixl::aarch64::Label compressed_string_loop; + vixl::aarch64::Label compressed_string_vector_loop; + vixl::aarch64::Label compressed_string_remainder; __ Sub(num_chr, srcEnd, srcBegin); // Early out for valid zero-length retrievals. __ Cbz(num_chr, &done); @@ -2013,16 +2023,39 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ B(&done); if (mirror::kUseStringCompression) { + // For compressed strings, acquire a SIMD temporary register. + VRegister vtmp1 = temps.AcquireVRegisterOfSize(kQRegSize); const size_t c_char_size = DataType::Size(DataType::Type::kInt8); DCHECK_EQ(c_char_size, 1u); __ Bind(&compressed_string_preloop); __ Add(src_ptr, src_ptr, Operand(srcBegin)); - // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. - __ Bind(&compressed_string_loop); + + // Save repairing the value of num_chr on the < 8 character path. + __ Subs(tmp1, num_chr, 8); + __ B(lt, &compressed_string_remainder); + + // Keep the result of the earlier subs, we are going to fetch at least 8 characters. + __ Mov(num_chr, tmp1); + + // Main loop for compressed src, copying 8 characters (8-bit) to (16-bit) at a time. + // Uses SIMD instructions. + __ Bind(&compressed_string_vector_loop); + __ Ld1(vtmp1.V8B(), MemOperand(src_ptr, c_char_size * 8, PostIndex)); + __ Subs(num_chr, num_chr, 8); + __ Uxtl(vtmp1.V8H(), vtmp1.V8B()); + __ St1(vtmp1.V8H(), MemOperand(dst_ptr, char_size * 8, PostIndex)); + __ B(ge, &compressed_string_vector_loop); + + __ Adds(num_chr, num_chr, 8); + __ B(eq, &done); + + // Loop for < 8 character case and remainder handling with a compressed src. + // Copies 1 character (8-bit) to (16-bit) at a time. + __ Bind(&compressed_string_remainder); __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex)); __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex)); __ Subs(num_chr, num_chr, Operand(1)); - __ B(gt, &compressed_string_loop); + __ B(gt, &compressed_string_remainder); } __ Bind(&done); @@ -2796,22 +2829,25 @@ static void GenIsInfinite(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { Operand infinity; + Operand tst_mask; Register out; if (is64bit) { infinity = kPositiveInfinityDouble; + tst_mask = MaskLeastSignificant<uint64_t>(63); out = XRegisterFrom(locations->Out()); } else { infinity = kPositiveInfinityFloat; + tst_mask = MaskLeastSignificant<uint32_t>(31); out = WRegisterFrom(locations->Out()); } - const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out); - MoveFPToInt(locations, is64bit, masm); + // Checks whether exponent bits are all 1 and fraction bits are all 0. __ Eor(out, out, infinity); - // We don't care about the sign bit, so shift left. - __ Cmp(zero, Operand(out, LSL, 1)); + // TST bitmask is used to mask out the sign bit: either 0x7fffffff or 0x7fffffffffffffff + // depending on is64bit. + __ Tst(out, tst_mask); __ Cset(out, eq); } @@ -3169,6 +3205,203 @@ void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) { GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out); } +void IntrinsicLocationsBuilderARM64::VisitFP16ToFloat(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + LocationSummary* locations = new (allocator_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16ToFloat(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasFP16()); + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope scratch_scope(masm); + Register bits = InputRegisterAt(invoke, 0); + VRegister out = SRegisterFrom(invoke->GetLocations()->Out()); + VRegister half = scratch_scope.AcquireH(); + __ Fmov(half, bits); // ARMv8.2 + __ Fcvt(out, half); +} + +void IntrinsicLocationsBuilderARM64::VisitFP16ToHalf(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + LocationSummary* locations = new (allocator_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16ToHalf(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasFP16()); + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope scratch_scope(masm); + VRegister in = SRegisterFrom(invoke->GetLocations()->InAt(0)); + VRegister half = scratch_scope.AcquireH(); + Register out = WRegisterFrom(invoke->GetLocations()->Out()); + __ Fcvt(half, in); + __ Fmov(out, half); + __ Sxth(out, out); // sign extend due to returning a short type. +} + +template<typename OP> +void GenerateFP16Round(HInvoke* invoke, + CodeGeneratorARM64* const codegen_, + MacroAssembler* masm, + const OP roundOp) { + DCHECK(codegen_->GetInstructionSetFeatures().HasFP16()); + LocationSummary* locations = invoke->GetLocations(); + UseScratchRegisterScope scratch_scope(masm); + Register out = WRegisterFrom(locations->Out()); + VRegister half = scratch_scope.AcquireH(); + __ Fmov(half, WRegisterFrom(locations->InAt(0))); + roundOp(half, half); + __ Fmov(out, half); + __ Sxth(out, out); +} + +void IntrinsicLocationsBuilderARM64::VisitFP16Floor(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16Floor(HInvoke* invoke) { + MacroAssembler* masm = GetVIXLAssembler(); + auto roundOp = [masm](const VRegister& out, const VRegister& in) { + __ Frintm(out, in); // Round towards Minus infinity + }; + GenerateFP16Round(invoke, codegen_, masm, roundOp); +} + +void IntrinsicLocationsBuilderARM64::VisitFP16Ceil(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16Ceil(HInvoke* invoke) { + MacroAssembler* masm = GetVIXLAssembler(); + auto roundOp = [masm](const VRegister& out, const VRegister& in) { + __ Frintp(out, in); // Round towards Plus infinity + }; + GenerateFP16Round(invoke, codegen_, masm, roundOp); +} + +void IntrinsicLocationsBuilderARM64::VisitFP16Rint(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16Rint(HInvoke* invoke) { + MacroAssembler* masm = GetVIXLAssembler(); + auto roundOp = [masm](const VRegister& out, const VRegister& in) { + __ Frintn(out, in); // Round to nearest, with ties to even + }; + GenerateFP16Round(invoke, codegen_, masm, roundOp); +} + +template<typename OP> +void GenerateFP16Compare(HInvoke* invoke, + CodeGeneratorARM64* codegen, + MacroAssembler* masm, + const OP compareOp) { + DCHECK(codegen->GetInstructionSetFeatures().HasFP16()); + LocationSummary* locations = invoke->GetLocations(); + Register out = WRegisterFrom(locations->Out()); + VRegister half0 = HRegisterFrom(locations->GetTemp(0)); + VRegister half1 = HRegisterFrom(locations->GetTemp(1)); + __ Fmov(half0, WRegisterFrom(locations->InAt(0))); + __ Fmov(half1, WRegisterFrom(locations->InAt(1))); + compareOp(out, half0, half1); +} + +static inline void GenerateFP16Compare(HInvoke* invoke, + CodeGeneratorARM64* codegen, + MacroAssembler* masm, + vixl::aarch64::Condition cond) { + auto compareOp = [masm, cond](const Register out, const VRegister& in0, const VRegister& in1) { + __ Fcmp(in0, in1); + __ Cset(out, cond); + }; + GenerateFP16Compare(invoke, codegen, masm, compareOp); +} + +void IntrinsicLocationsBuilderARM64::VisitFP16Greater(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + CreateIntIntToIntLocations(allocator_, invoke); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16Greater(HInvoke* invoke) { + MacroAssembler* masm = GetVIXLAssembler(); + GenerateFP16Compare(invoke, codegen_, masm, gt); +} + +void IntrinsicLocationsBuilderARM64::VisitFP16GreaterEquals(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + CreateIntIntToIntLocations(allocator_, invoke); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16GreaterEquals(HInvoke* invoke) { + MacroAssembler* masm = GetVIXLAssembler(); + GenerateFP16Compare(invoke, codegen_, masm, ge); +} + +void IntrinsicLocationsBuilderARM64::VisitFP16Less(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + CreateIntIntToIntLocations(allocator_, invoke); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16Less(HInvoke* invoke) { + MacroAssembler* masm = GetVIXLAssembler(); + GenerateFP16Compare(invoke, codegen_, masm, mi); +} + +void IntrinsicLocationsBuilderARM64::VisitFP16LessEquals(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasFP16()) { + return; + } + + CreateIntIntToIntLocations(allocator_, invoke); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitFP16LessEquals(HInvoke* invoke) { + MacroAssembler* masm = GetVIXLAssembler(); + GenerateFP16Compare(invoke, codegen_, masm, ls); +} + UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); @@ -3176,7 +3409,16 @@ UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter); UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend); UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength); UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppend); +UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendObject); +UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendString); +UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharSequence); +UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharArray); +UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendBoolean); +UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendChar); +UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendInt); +UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendLong); +UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendFloat); +UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendDouble); UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength); UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index f0aa92e981..89e5203461 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -3070,13 +3070,31 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update) UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToFloat) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToHalf) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Floor) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Ceil) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Rint) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Greater) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16GreaterEquals) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Less) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16LessEquals) UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend); +UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendObject); +UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendString); +UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharSequence); +UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharArray); +UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendBoolean); +UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendChar); +UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendInt); +UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendLong); +UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendFloat); +UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendDouble); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString); diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc deleted file mode 100644 index 3da0e578bf..0000000000 --- a/compiler/optimizing/intrinsics_mips.cc +++ /dev/null @@ -1,2732 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "intrinsics_mips.h" - -#include "arch/mips/instruction_set_features_mips.h" -#include "art_method.h" -#include "code_generator_mips.h" -#include "entrypoints/quick/quick_entrypoints.h" -#include "heap_poisoning.h" -#include "intrinsics.h" -#include "mirror/array-inl.h" -#include "mirror/object_array-inl.h" -#include "mirror/string.h" -#include "scoped_thread_state_change-inl.h" -#include "thread.h" -#include "utils/mips/assembler_mips.h" -#include "utils/mips/constants_mips.h" - -namespace art { - -namespace mips { - -IntrinsicLocationsBuilderMIPS::IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen) - : codegen_(codegen), allocator_(codegen->GetGraph()->GetAllocator()) { -} - -MipsAssembler* IntrinsicCodeGeneratorMIPS::GetAssembler() { - return reinterpret_cast<MipsAssembler*>(codegen_->GetAssembler()); -} - -ArenaAllocator* IntrinsicCodeGeneratorMIPS::GetAllocator() { - return codegen_->GetGraph()->GetAllocator(); -} - -inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() const { - return codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); -} - -inline bool IntrinsicCodeGeneratorMIPS::IsR6() const { - return codegen_->GetInstructionSetFeatures().IsR6(); -} - -inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const { - return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); -} - -inline bool IntrinsicCodeGeneratorMIPS::HasMsa() const { - return codegen_->GetInstructionSetFeatures().HasMsa(); -} - -#define __ codegen->GetAssembler()-> - -static void MoveFromReturnRegister(Location trg, - DataType::Type type, - CodeGeneratorMIPS* codegen) { - if (!trg.IsValid()) { - DCHECK_EQ(type, DataType::Type::kVoid); - return; - } - - DCHECK_NE(type, DataType::Type::kVoid); - - if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) { - Register trg_reg = trg.AsRegister<Register>(); - if (trg_reg != V0) { - __ Move(V0, trg_reg); - } - } else { - FRegister trg_reg = trg.AsFpuRegister<FRegister>(); - if (trg_reg != F0) { - if (type == DataType::Type::kFloat32) { - __ MovS(F0, trg_reg); - } else { - __ MovD(F0, trg_reg); - } - } - } -} - -static void MoveArguments(HInvoke* invoke, CodeGeneratorMIPS* codegen) { - InvokeDexCallingConventionVisitorMIPS calling_convention_visitor; - IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); -} - -// Slow-path for fallback (calling the managed code to handle the -// intrinsic) in an intrinsified call. This will copy the arguments -// into the positions for a regular call. -// -// Note: The actual parameters are required to be in the locations -// given by the invoke's location summary. If an intrinsic -// modifies those locations before a slowpath call, they must be -// restored! -class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS { - public: - explicit IntrinsicSlowPathMIPS(HInvoke* invoke) : SlowPathCodeMIPS(invoke), invoke_(invoke) { } - - void EmitNativeCode(CodeGenerator* codegen_in) override { - CodeGeneratorMIPS* codegen = down_cast<CodeGeneratorMIPS*>(codegen_in); - - __ Bind(GetEntryLabel()); - - SaveLiveRegisters(codegen, invoke_->GetLocations()); - - MoveArguments(invoke_, codegen); - - if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall( - invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(A0), this); - } else { - codegen->GenerateVirtualCall( - invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0), this); - } - - // Copy the result back to the expected output. - Location out = invoke_->GetLocations()->Out(); - if (out.IsValid()) { - DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. - DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); - MoveFromReturnRegister(out, invoke_->GetType(), codegen); - } - - RestoreLiveRegisters(codegen, invoke_->GetLocations()); - __ B(GetExitLabel()); - } - - const char* GetDescription() const override { return "IntrinsicSlowPathMIPS"; } - - private: - // The instruction where this slow path is happening. - HInvoke* const invoke_; - - DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathMIPS); -}; - -#undef __ - -bool IntrinsicLocationsBuilderMIPS::TryDispatch(HInvoke* invoke) { - Dispatch(invoke); - LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); -} - -#define __ assembler-> - -static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresRegister()); -} - -static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { - FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); - - if (is64bit) { - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - __ Mfc1(out_lo, in); - __ MoveFromFpuHigh(out_hi, in); - } else { - Register out = locations->Out().AsRegister<Register>(); - - __ Mfc1(out, in); - } -} - -// long java.lang.Double.doubleToRawLongBits(double) -void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - CreateFPToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); -} - -// int java.lang.Float.floatToRawIntBits(float) -void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - CreateFPToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); -} - -static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); -} - -static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - - if (is64bit) { - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - - __ Mtc1(in_lo, out); - __ MoveToFpuHigh(in_hi, out); - } else { - Register in = locations->InAt(0).AsRegister<Register>(); - - __ Mtc1(in, out); - } -} - -// double java.lang.Double.longBitsToDouble(long) -void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - CreateIntToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); -} - -// float java.lang.Float.intBitsToFloat(int) -void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { - CreateIntToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); -} - -static void CreateIntToIntLocations(ArenaAllocator* allocator, - HInvoke* invoke, - Location::OutputOverlap overlaps = Location::kNoOutputOverlap) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), overlaps); -} - -static void GenReverse(LocationSummary* locations, - DataType::Type type, - bool isR2OrNewer, - bool isR6, - bool reverseBits, - MipsAssembler* assembler) { - DCHECK(type == DataType::Type::kInt16 || - type == DataType::Type::kInt32 || - type == DataType::Type::kInt64); - DCHECK(type != DataType::Type::kInt16 || !reverseBits); - - if (type == DataType::Type::kInt16) { - Register in = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (isR2OrNewer) { - __ Wsbh(out, in); - __ Seh(out, out); - } else { - __ Sll(TMP, in, 24); - __ Sra(TMP, TMP, 16); - __ Sll(out, in, 16); - __ Srl(out, out, 24); - __ Or(out, out, TMP); - } - } else if (type == DataType::Type::kInt32) { - Register in = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (isR2OrNewer) { - __ Rotr(out, in, 16); - __ Wsbh(out, out); - } else { - // MIPS32r1 - // __ Rotr(out, in, 16); - __ Sll(TMP, in, 16); - __ Srl(out, in, 16); - __ Or(out, out, TMP); - // __ Wsbh(out, out); - __ LoadConst32(AT, 0x00FF00FF); - __ And(TMP, out, AT); - __ Sll(TMP, TMP, 8); - __ Srl(out, out, 8); - __ And(out, out, AT); - __ Or(out, out, TMP); - } - if (reverseBits) { - if (isR6) { - __ Bitswap(out, out); - } else { - __ LoadConst32(AT, 0x0F0F0F0F); - __ And(TMP, out, AT); - __ Sll(TMP, TMP, 4); - __ Srl(out, out, 4); - __ And(out, out, AT); - __ Or(out, TMP, out); - __ LoadConst32(AT, 0x33333333); - __ And(TMP, out, AT); - __ Sll(TMP, TMP, 2); - __ Srl(out, out, 2); - __ And(out, out, AT); - __ Or(out, TMP, out); - __ LoadConst32(AT, 0x55555555); - __ And(TMP, out, AT); - __ Sll(TMP, TMP, 1); - __ Srl(out, out, 1); - __ And(out, out, AT); - __ Or(out, TMP, out); - } - } - } else if (type == DataType::Type::kInt64) { - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - if (isR2OrNewer) { - __ Rotr(AT, in_hi, 16); - __ Rotr(TMP, in_lo, 16); - __ Wsbh(out_lo, AT); - __ Wsbh(out_hi, TMP); - } else { - // When calling CreateIntToIntLocations() we promised that the - // use of the out_lo/out_hi wouldn't overlap with the use of - // in_lo/in_hi. Be very careful not to write to out_lo/out_hi - // until we're completely done reading from in_lo/in_hi. - // __ Rotr(TMP, in_lo, 16); - __ Sll(TMP, in_lo, 16); - __ Srl(AT, in_lo, 16); - __ Or(TMP, TMP, AT); // Hold in TMP until it's safe - // to write to out_hi. - // __ Rotr(out_lo, in_hi, 16); - __ Sll(AT, in_hi, 16); - __ Srl(out_lo, in_hi, 16); // Here we are finally done reading - // from in_lo/in_hi so it's okay to - // write to out_lo/out_hi. - __ Or(out_lo, out_lo, AT); - // __ Wsbh(out_hi, out_hi); - __ LoadConst32(AT, 0x00FF00FF); - __ And(out_hi, TMP, AT); - __ Sll(out_hi, out_hi, 8); - __ Srl(TMP, TMP, 8); - __ And(TMP, TMP, AT); - __ Or(out_hi, out_hi, TMP); - // __ Wsbh(out_lo, out_lo); - __ And(TMP, out_lo, AT); // AT already holds the correct mask value - __ Sll(TMP, TMP, 8); - __ Srl(out_lo, out_lo, 8); - __ And(out_lo, out_lo, AT); - __ Or(out_lo, out_lo, TMP); - } - if (reverseBits) { - if (isR6) { - __ Bitswap(out_hi, out_hi); - __ Bitswap(out_lo, out_lo); - } else { - __ LoadConst32(AT, 0x0F0F0F0F); - __ And(TMP, out_hi, AT); - __ Sll(TMP, TMP, 4); - __ Srl(out_hi, out_hi, 4); - __ And(out_hi, out_hi, AT); - __ Or(out_hi, TMP, out_hi); - __ And(TMP, out_lo, AT); - __ Sll(TMP, TMP, 4); - __ Srl(out_lo, out_lo, 4); - __ And(out_lo, out_lo, AT); - __ Or(out_lo, TMP, out_lo); - __ LoadConst32(AT, 0x33333333); - __ And(TMP, out_hi, AT); - __ Sll(TMP, TMP, 2); - __ Srl(out_hi, out_hi, 2); - __ And(out_hi, out_hi, AT); - __ Or(out_hi, TMP, out_hi); - __ And(TMP, out_lo, AT); - __ Sll(TMP, TMP, 2); - __ Srl(out_lo, out_lo, 2); - __ And(out_lo, out_lo, AT); - __ Or(out_lo, TMP, out_lo); - __ LoadConst32(AT, 0x55555555); - __ And(TMP, out_hi, AT); - __ Sll(TMP, TMP, 1); - __ Srl(out_hi, out_hi, 1); - __ And(out_hi, out_hi, AT); - __ Or(out_hi, TMP, out_hi); - __ And(TMP, out_lo, AT); - __ Sll(TMP, TMP, 1); - __ Srl(out_lo, out_lo, 1); - __ And(out_lo, out_lo, AT); - __ Or(out_lo, TMP, out_lo); - } - } - } -} - -// int java.lang.Integer.reverseBytes(int) -void IntrinsicLocationsBuilderMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { - GenReverse(invoke->GetLocations(), - DataType::Type::kInt32, - IsR2OrNewer(), - IsR6(), - /* reverseBits= */ false, - GetAssembler()); -} - -// long java.lang.Long.reverseBytes(long) -void IntrinsicLocationsBuilderMIPS::VisitLongReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) { - GenReverse(invoke->GetLocations(), - DataType::Type::kInt64, - IsR2OrNewer(), - IsR6(), - /* reverseBits= */ false, - GetAssembler()); -} - -// short java.lang.Short.reverseBytes(short) -void IntrinsicLocationsBuilderMIPS::VisitShortReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) { - GenReverse(invoke->GetLocations(), - DataType::Type::kInt16, - IsR2OrNewer(), - IsR6(), - /* reverseBits= */ false, - GetAssembler()); -} - -static void GenNumberOfLeadingZeroes(LocationSummary* locations, - bool is64bit, - bool isR6, - MipsAssembler* assembler) { - Register out = locations->Out().AsRegister<Register>(); - if (is64bit) { - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - - if (isR6) { - __ ClzR6(AT, in_hi); - __ ClzR6(TMP, in_lo); - __ Seleqz(TMP, TMP, in_hi); - } else { - __ ClzR2(AT, in_hi); - __ ClzR2(TMP, in_lo); - __ Movn(TMP, ZERO, in_hi); - } - __ Addu(out, AT, TMP); - } else { - Register in = locations->InAt(0).AsRegister<Register>(); - - if (isR6) { - __ ClzR6(out, in); - } else { - __ ClzR2(out, in); - } - } -} - -// int java.lang.Integer.numberOfLeadingZeros(int i) -void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ false, IsR6(), GetAssembler()); -} - -// int java.lang.Long.numberOfLeadingZeros(long i) -void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ true, IsR6(), GetAssembler()); -} - -static void GenNumberOfTrailingZeroes(LocationSummary* locations, - bool is64bit, - bool isR6, - MipsAssembler* assembler) { - Register out = locations->Out().AsRegister<Register>(); - Register in_lo; - Register in; - - if (is64bit) { - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - - in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - - // If in_lo is zero then count the number of trailing zeroes in in_hi; - // otherwise count the number of trailing zeroes in in_lo. - // out = in_lo ? in_lo : in_hi; - if (isR6) { - __ Seleqz(out, in_hi, in_lo); - __ Selnez(TMP, in_lo, in_lo); - __ Or(out, out, TMP); - } else { - __ Movz(out, in_hi, in_lo); - __ Movn(out, in_lo, in_lo); - } - - in = out; - } else { - in = locations->InAt(0).AsRegister<Register>(); - // Give in_lo a dummy value to keep the compiler from complaining. - // Since we only get here in the 32-bit case, this value will never - // be used. - in_lo = in; - } - - if (isR6) { - // We don't have an instruction to count the number of trailing zeroes. - // Start by flipping the bits end-for-end so we can count the number of - // leading zeroes instead. - __ Rotr(out, in, 16); - __ Wsbh(out, out); - __ Bitswap(out, out); - __ ClzR6(out, out); - } else { - // Convert trailing zeroes to trailing ones, and bits to their left - // to zeroes. - __ Addiu(TMP, in, -1); - __ Xor(out, TMP, in); - __ And(out, out, TMP); - // Count number of leading zeroes. - __ ClzR2(out, out); - // Subtract number of leading zeroes from 32 to get number of trailing ones. - // Remember that the trailing ones were formerly trailing zeroes. - __ LoadConst32(TMP, 32); - __ Subu(out, TMP, out); - } - - if (is64bit) { - // If in_lo is zero, then we counted the number of trailing zeroes in in_hi so we must add the - // number of trailing zeroes in in_lo (32) to get the correct final count - __ LoadConst32(TMP, 32); - if (isR6) { - __ Seleqz(TMP, TMP, in_lo); - } else { - __ Movn(TMP, ZERO, in_lo); - } - __ Addu(out, out, TMP); - } -} - -// int java.lang.Integer.numberOfTrailingZeros(int i) -void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke, Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ false, IsR6(), GetAssembler()); -} - -// int java.lang.Long.numberOfTrailingZeros(long i) -void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke, Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ true, IsR6(), GetAssembler()); -} - -// int java.lang.Integer.reverse(int) -void IntrinsicLocationsBuilderMIPS::VisitIntegerReverse(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitIntegerReverse(HInvoke* invoke) { - GenReverse(invoke->GetLocations(), - DataType::Type::kInt32, - IsR2OrNewer(), - IsR6(), - /* reverseBits= */ true, - GetAssembler()); -} - -// long java.lang.Long.reverse(long) -void IntrinsicLocationsBuilderMIPS::VisitLongReverse(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitLongReverse(HInvoke* invoke) { - GenReverse(invoke->GetLocations(), - DataType::Type::kInt64, - IsR2OrNewer(), - IsR6(), - /* reverseBits= */ true, - GetAssembler()); -} - -static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -static void GenBitCount(LocationSummary* locations, - DataType::Type type, - bool isR6, - bool hasMsa, - MipsAssembler* assembler) { - Register out = locations->Out().AsRegister<Register>(); - - // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel - // - // A generalization of the best bit counting method to integers of - // bit-widths up to 128 (parameterized by type T) is this: - // - // v = v - ((v >> 1) & (T)~(T)0/3); // temp - // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3); // temp - // v = (v + (v >> 4)) & (T)~(T)0/255*15; // temp - // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; // count - // - // For comparison, for 32-bit quantities, this algorithm can be executed - // using 20 MIPS instructions (the calls to LoadConst32() generate two - // machine instructions each for the values being used in this algorithm). - // A(n unrolled) loop-based algorithm required 25 instructions. - // - // For 64-bit quantities, this algorithm gets executed twice, (once - // for in_lo, and again for in_hi), but saves a few instructions - // because the mask values only have to be loaded once. Using this - // algorithm the count for a 64-bit operand can be performed in 29 - // instructions compared to a loop-based algorithm which required 47 - // instructions. - - if (hasMsa) { - if (type == DataType::Type::kInt32) { - Register in = locations->InAt(0).AsRegister<Register>(); - __ Mtc1(in, FTMP); - __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); - __ Mfc1(out, FTMP); - } else { - DCHECK_EQ(type, DataType::Type::kInt64); - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - __ Mtc1(in_lo, FTMP); - __ Mthc1(in_hi, FTMP); - __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); - __ Mfc1(out, FTMP); - } - } else { - if (type == DataType::Type::kInt32) { - Register in = locations->InAt(0).AsRegister<Register>(); - - __ Srl(TMP, in, 1); - __ LoadConst32(AT, 0x55555555); - __ And(TMP, TMP, AT); - __ Subu(TMP, in, TMP); - __ LoadConst32(AT, 0x33333333); - __ And(out, TMP, AT); - __ Srl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Addu(TMP, out, TMP); - __ Srl(out, TMP, 4); - __ Addu(out, out, TMP); - __ LoadConst32(AT, 0x0F0F0F0F); - __ And(out, out, AT); - __ LoadConst32(TMP, 0x01010101); - if (isR6) { - __ MulR6(out, out, TMP); - } else { - __ MulR2(out, out, TMP); - } - __ Srl(out, out, 24); - } else { - DCHECK_EQ(type, DataType::Type::kInt64); - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register tmp_hi = locations->GetTemp(0).AsRegister<Register>(); - Register out_hi = locations->GetTemp(1).AsRegister<Register>(); - Register tmp_lo = TMP; - Register out_lo = out; - - __ Srl(tmp_lo, in_lo, 1); - __ Srl(tmp_hi, in_hi, 1); - - __ LoadConst32(AT, 0x55555555); - - __ And(tmp_lo, tmp_lo, AT); - __ Subu(tmp_lo, in_lo, tmp_lo); - - __ And(tmp_hi, tmp_hi, AT); - __ Subu(tmp_hi, in_hi, tmp_hi); - - __ LoadConst32(AT, 0x33333333); - - __ And(out_lo, tmp_lo, AT); - __ Srl(tmp_lo, tmp_lo, 2); - __ And(tmp_lo, tmp_lo, AT); - __ Addu(tmp_lo, out_lo, tmp_lo); - - __ And(out_hi, tmp_hi, AT); - __ Srl(tmp_hi, tmp_hi, 2); - __ And(tmp_hi, tmp_hi, AT); - __ Addu(tmp_hi, out_hi, tmp_hi); - - // Here we deviate from the original algorithm a bit. We've reached - // the stage where the bitfields holding the subtotals are large - // enough to hold the combined subtotals for both the low word, and - // the high word. This means that we can add the subtotals for the - // the high, and low words into a single word, and compute the final - // result for both the high, and low words using fewer instructions. - __ LoadConst32(AT, 0x0F0F0F0F); - - __ Addu(TMP, tmp_hi, tmp_lo); - - __ Srl(out, TMP, 4); - __ And(out, out, AT); - __ And(TMP, TMP, AT); - __ Addu(out, out, TMP); - - __ LoadConst32(AT, 0x01010101); - - if (isR6) { - __ MulR6(out, out, AT); - } else { - __ MulR2(out, out, AT); - } - - __ Srl(out, out, 24); - } - } -} - -// int java.lang.Integer.bitCount(int) -void IntrinsicLocationsBuilderMIPS::VisitIntegerBitCount(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), HasMsa(), GetAssembler()); -} - -// int java.lang.Long.bitCount(int) -void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), HasMsa(), GetAssembler()); -} - -// double java.lang.Math.sqrt(double) -void IntrinsicLocationsBuilderMIPS::VisitMathSqrt(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathSqrt(HInvoke* invoke) { - LocationSummary* locations = invoke->GetLocations(); - MipsAssembler* assembler = GetAssembler(); - FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - - __ SqrtD(out, in); -} - -// byte libcore.io.Memory.peekByte(long address) -void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekByte(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekByte(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); - Register out = invoke->GetLocations()->Out().AsRegister<Register>(); - - __ Lb(out, adr, 0); -} - -// short libcore.io.Memory.peekShort(long address) -void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekShortNative(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekShortNative(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); - Register out = invoke->GetLocations()->Out().AsRegister<Register>(); - - if (IsR6()) { - __ Lh(out, adr, 0); - } else if (IsR2OrNewer()) { - // Unlike for words, there are no lhl/lhr instructions to load - // unaligned halfwords so the code loads individual bytes, in case - // the address isn't halfword-aligned, and assembles them into a - // signed halfword. - __ Lb(AT, adr, 1); // This byte must be sign-extended. - __ Lb(out, adr, 0); // This byte can be either sign-extended, or - // zero-extended because the following - // instruction overwrites the sign bits. - __ Ins(out, AT, 8, 24); - } else { - __ Lbu(AT, adr, 0); // This byte must be zero-extended. If it's not - // the "or" instruction below will destroy the upper - // 24 bits of the final result. - __ Lb(out, adr, 1); // This byte must be sign-extended. - __ Sll(out, out, 8); - __ Or(out, out, AT); - } -} - -// int libcore.io.Memory.peekInt(long address) -void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekIntNative(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke, Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekIntNative(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); - Register out = invoke->GetLocations()->Out().AsRegister<Register>(); - - if (IsR6()) { - __ Lw(out, adr, 0); - } else { - __ Lwr(out, adr, 0); - __ Lwl(out, adr, 3); - } -} - -// long libcore.io.Memory.peekLong(long address) -void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekLongNative(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke, Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekLongNative(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); - Register out_lo = invoke->GetLocations()->Out().AsRegisterPairLow<Register>(); - Register out_hi = invoke->GetLocations()->Out().AsRegisterPairHigh<Register>(); - - if (IsR6()) { - __ Lw(out_lo, adr, 0); - __ Lw(out_hi, adr, 4); - } else { - __ Lwr(out_lo, adr, 0); - __ Lwl(out_lo, adr, 3); - __ Lwr(out_hi, adr, 4); - __ Lwl(out_hi, adr, 7); - } -} - -static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); -} - -// void libcore.io.Memory.pokeByte(long address, byte value) -void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeByte(HInvoke* invoke) { - CreateIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeByte(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); - Register val = invoke->GetLocations()->InAt(1).AsRegister<Register>(); - - __ Sb(val, adr, 0); -} - -// void libcore.io.Memory.pokeShort(long address, short value) -void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeShortNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeShortNative(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); - Register val = invoke->GetLocations()->InAt(1).AsRegister<Register>(); - - if (IsR6()) { - __ Sh(val, adr, 0); - } else { - // Unlike for words, there are no shl/shr instructions to store - // unaligned halfwords so the code stores individual bytes, in case - // the address isn't halfword-aligned. - __ Sb(val, adr, 0); - __ Srl(AT, val, 8); - __ Sb(AT, adr, 1); - } -} - -// void libcore.io.Memory.pokeInt(long address, int value) -void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeIntNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeIntNative(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); - Register val = invoke->GetLocations()->InAt(1).AsRegister<Register>(); - - if (IsR6()) { - __ Sw(val, adr, 0); - } else { - __ Swr(val, adr, 0); - __ Swl(val, adr, 3); - } -} - -// void libcore.io.Memory.pokeLong(long address, long value) -void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeLongNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeLongNative(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - Register adr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); - Register val_lo = invoke->GetLocations()->InAt(1).AsRegisterPairLow<Register>(); - Register val_hi = invoke->GetLocations()->InAt(1).AsRegisterPairHigh<Register>(); - - if (IsR6()) { - __ Sw(val_lo, adr, 0); - __ Sw(val_hi, adr, 4); - } else { - __ Swr(val_lo, adr, 0); - __ Swl(val_lo, adr, 3); - __ Swr(val_hi, adr, 4); - __ Swl(val_hi, adr, 7); - } -} - -// Thread java.lang.Thread.currentThread() -void IntrinsicLocationsBuilderMIPS::VisitThreadCurrentThread(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetOut(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorMIPS::VisitThreadCurrentThread(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - Register out = invoke->GetLocations()->Out().AsRegister<Register>(); - - __ LoadFromOffset(kLoadWord, - out, - TR, - Thread::PeerOffset<kMipsPointerSize>().Int32Value()); -} - -static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, - HInvoke* invoke, - DataType::Type type) { - bool can_call = kEmitCompilerReadBarrier && - (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || - invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); - LocationSummary* locations = - new (allocator) LocationSummary(invoke, - can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall, - kIntrinsified); - if (can_call && kUseBakerReadBarrier) { - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - } - locations->SetInAt(0, Location::NoLocation()); // Unused receiver. - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetInAt(2, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), - (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); - if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); - } -} - -// Note that the caller must supply a properly aligned memory address. -// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). -static void GenUnsafeGet(HInvoke* invoke, - DataType::Type type, - bool is_volatile, - bool is_R6, - CodeGeneratorMIPS* codegen) { - LocationSummary* locations = invoke->GetLocations(); - DCHECK((type == DataType::Type::kInt32) || - (type == DataType::Type::kInt64) || - (type == DataType::Type::kReference)) << type; - MipsAssembler* assembler = codegen->GetAssembler(); - // Target register. - Location trg_loc = locations->Out(); - // Object pointer. - Location base_loc = locations->InAt(1); - Register base = base_loc.AsRegister<Register>(); - // The "offset" argument is passed as a "long". Since this code is for - // a 32-bit processor, we can only use 32-bit addresses, so we only - // need the low 32-bits of offset. - Location offset_loc = locations->InAt(2); - Register offset_lo = offset_loc.AsRegisterPairLow<Register>(); - - if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == DataType::Type::kReference))) { - __ Addu(TMP, base, offset_lo); - } - - switch (type) { - case DataType::Type::kInt64: { - Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); - Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); - CHECK(!is_volatile); // TODO: support atomic 8-byte volatile loads. - if (is_R6) { - __ Lw(trg_lo, TMP, 0); - __ Lw(trg_hi, TMP, 4); - } else { - __ Lwr(trg_lo, TMP, 0); - __ Lwl(trg_lo, TMP, 3); - __ Lwr(trg_hi, TMP, 4); - __ Lwl(trg_hi, TMP, 7); - } - break; - } - - case DataType::Type::kInt32: { - Register trg = trg_loc.AsRegister<Register>(); - if (is_R6) { - __ Lw(trg, TMP, 0); - } else { - __ Lwr(trg, TMP, 0); - __ Lwl(trg, TMP, 3); - } - if (is_volatile) { - __ Sync(0); - } - break; - } - - case DataType::Type::kReference: { - Register trg = trg_loc.AsRegister<Register>(); - if (kEmitCompilerReadBarrier) { - if (kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); - codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, - trg_loc, - base, - /* offset= */ 0U, - /* index= */ offset_loc, - TIMES_1, - temp, - /* needs_null_check= */ false); - if (is_volatile) { - __ Sync(0); - } - } else { - if (is_R6) { - __ Lw(trg, TMP, 0); - } else { - __ Lwr(trg, TMP, 0); - __ Lwl(trg, TMP, 3); - } - if (is_volatile) { - __ Sync(0); - } - codegen->GenerateReadBarrierSlow(invoke, - trg_loc, - trg_loc, - base_loc, - /* offset= */ 0U, - /* index= */ offset_loc); - } - } else { - if (is_R6) { - __ Lw(trg, TMP, 0); - } else { - __ Lwr(trg, TMP, 0); - __ Lwl(trg, TMP, 3); - } - if (is_volatile) { - __ Sync(0); - } - __ MaybeUnpoisonHeapReference(trg); - } - break; - } - - default: - LOG(FATAL) << "Unexpected type " << type; - UNREACHABLE(); - } -} - -// int sun.misc.Unsafe.getInt(Object o, long offset) -void IntrinsicLocationsBuilderMIPS::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, IsR6(), codegen_); -} - -// int sun.misc.Unsafe.getIntVolatile(Object o, long offset) -void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, IsR6(), codegen_); -} - -// long sun.misc.Unsafe.getLong(Object o, long offset) -void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, IsR6(), codegen_); -} - -// Object sun.misc.Unsafe.getObject(Object o, long offset) -void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, IsR6(), codegen_); -} - -// Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) -void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, IsR6(), codegen_); -} - -static void CreateIntIntIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::NoLocation()); // Unused receiver. - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetInAt(2, Location::RequiresRegister()); - locations->SetInAt(3, Location::RequiresRegister()); -} - -// Note that the caller must supply a properly aligned memory address. -// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). -static void GenUnsafePut(LocationSummary* locations, - DataType::Type type, - bool is_volatile, - bool is_ordered, - bool is_R6, - CodeGeneratorMIPS* codegen) { - DCHECK((type == DataType::Type::kInt32) || - (type == DataType::Type::kInt64) || - (type == DataType::Type::kReference)) << type; - MipsAssembler* assembler = codegen->GetAssembler(); - // Object pointer. - Register base = locations->InAt(1).AsRegister<Register>(); - // The "offset" argument is passed as a "long", i.e., it's 64-bits in - // size. Since this code is for a 32-bit processor, we can only use - // 32-bit addresses, so we only need the low 32-bits of offset. - Register offset_lo = locations->InAt(2).AsRegisterPairLow<Register>(); - - __ Addu(TMP, base, offset_lo); - if (is_volatile || is_ordered) { - __ Sync(0); - } - if ((type == DataType::Type::kInt32) || (type == DataType::Type::kReference)) { - Register value = locations->InAt(3).AsRegister<Register>(); - - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - __ PoisonHeapReference(AT, value); - value = AT; - } - - if (is_R6) { - __ Sw(value, TMP, 0); - } else { - __ Swr(value, TMP, 0); - __ Swl(value, TMP, 3); - } - } else { - Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>(); - Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>(); - CHECK(!is_volatile); // TODO: support atomic 8-byte volatile stores. - if (is_R6) { - __ Sw(value_lo, TMP, 0); - __ Sw(value_hi, TMP, 4); - } else { - __ Swr(value_lo, TMP, 0); - __ Swl(value_lo, TMP, 3); - __ Swr(value_hi, TMP, 4); - __ Swl(value_hi, TMP, 7); - } - } - - if (is_volatile) { - __ Sync(0); - } - - if (type == DataType::Type::kReference) { - bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(base, locations->InAt(3).AsRegister<Register>(), value_can_be_null); - } -} - -// void sun.misc.Unsafe.putInt(Object o, long offset, int x) -void IntrinsicLocationsBuilderMIPS::VisitUnsafePut(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafePut(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kInt32, - /* is_volatile= */ false, - /* is_ordered= */ false, - IsR6(), - codegen_); -} - -// void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x) -void IntrinsicLocationsBuilderMIPS::VisitUnsafePutOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafePutOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kInt32, - /* is_volatile= */ false, - /* is_ordered= */ true, - IsR6(), - codegen_); -} - -// void sun.misc.Unsafe.putIntVolatile(Object o, long offset, int x) -void IntrinsicLocationsBuilderMIPS::VisitUnsafePutVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafePutVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kInt32, - /* is_volatile= */ true, - /* is_ordered= */ false, - IsR6(), - codegen_); -} - -// void sun.misc.Unsafe.putObject(Object o, long offset, Object x) -void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObject(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObject(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kReference, - /* is_volatile= */ false, - /* is_ordered= */ false, - IsR6(), - codegen_); -} - -// void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x) -void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kReference, - /* is_volatile= */ false, - /* is_ordered= */ true, - IsR6(), - codegen_); -} - -// void sun.misc.Unsafe.putObjectVolatile(Object o, long offset, Object x) -void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kReference, - /* is_volatile= */ true, - /* is_ordered= */ false, - IsR6(), - codegen_); -} - -// void sun.misc.Unsafe.putLong(Object o, long offset, long x) -void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLong(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLong(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kInt64, - /* is_volatile= */ false, - /* is_ordered= */ false, - IsR6(), - codegen_); -} - -// void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x) -void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kInt64, - /* is_volatile= */ false, - /* is_ordered= */ true, - IsR6(), - codegen_); -} - -static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) { - bool can_call = kEmitCompilerReadBarrier && - kUseBakerReadBarrier && - (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); - LocationSummary* locations = - new (allocator) LocationSummary(invoke, - can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::NoLocation()); // Unused receiver. - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetInAt(2, Location::RequiresRegister()); - locations->SetInAt(3, Location::RequiresRegister()); - locations->SetInAt(4, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); - - // Temporary register used in CAS by (Baker) read barrier. - if (can_call) { - locations->AddTemp(Location::RequiresRegister()); - } -} - -// Note that the caller must supply a properly aligned memory address. -// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). -static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorMIPS* codegen) { - MipsAssembler* assembler = codegen->GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - bool isR6 = codegen->GetInstructionSetFeatures().IsR6(); - Register base = locations->InAt(1).AsRegister<Register>(); - Location offset_loc = locations->InAt(2); - Register offset_lo = offset_loc.AsRegisterPairLow<Register>(); - Register expected = locations->InAt(3).AsRegister<Register>(); - Register value = locations->InAt(4).AsRegister<Register>(); - Location out_loc = locations->Out(); - Register out = out_loc.AsRegister<Register>(); - - DCHECK_NE(base, out); - DCHECK_NE(offset_lo, out); - DCHECK_NE(expected, out); - - if (type == DataType::Type::kReference) { - // The only read barrier implementation supporting the - // UnsafeCASObject intrinsic is the Baker-style read barriers. - DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - - // Mark card for object assuming new value is stored. Worst case we will mark an unchanged - // object and scan the receiver at the next GC for nothing. - bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(base, value, value_can_be_null); - - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); - // Need to make sure the reference stored in the field is a to-space - // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, - out_loc, // Unused, used only as a "temporary" within the read barrier. - base, - /* offset= */ 0u, - /* index= */ offset_loc, - ScaleFactor::TIMES_1, - temp, - /* needs_null_check= */ false, - /* always_update_field= */ true); - } - } - - MipsLabel loop_head, exit_loop; - __ Addu(TMP, base, offset_lo); - - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - __ PoisonHeapReference(expected); - // Do not poison `value`, if it is the same register as - // `expected`, which has just been poisoned. - if (value != expected) { - __ PoisonHeapReference(value); - } - } - - // do { - // tmp_value = [tmp_ptr] - expected; - // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); - // result = tmp_value != 0; - - __ Sync(0); - __ Bind(&loop_head); - if ((type == DataType::Type::kInt32) || (type == DataType::Type::kReference)) { - if (isR6) { - __ LlR6(out, TMP); - } else { - __ LlR2(out, TMP); - } - } else { - LOG(FATAL) << "Unsupported op size " << type; - UNREACHABLE(); - } - __ Subu(out, out, expected); // If we didn't get the 'expected' - __ Sltiu(out, out, 1); // value, set 'out' to false, and - __ Beqz(out, &exit_loop); // return. - __ Move(out, value); // Use 'out' for the 'store conditional' instruction. - // If we use 'value' directly, we would lose 'value' - // in the case that the store fails. Whether the - // store succeeds, or fails, it will load the - // correct Boolean value into the 'out' register. - // This test isn't really necessary. We only support DataType::Type::kInt, - // DataType::Type::kReference, and we already verified that we're working on one - // of those two types. It's left here in case the code needs to support - // other types in the future. - if ((type == DataType::Type::kInt32) || (type == DataType::Type::kReference)) { - if (isR6) { - __ ScR6(out, TMP); - } else { - __ ScR2(out, TMP); - } - } - __ Beqz(out, &loop_head); // If we couldn't do the read-modify-write - // cycle atomically then retry. - __ Bind(&exit_loop); - __ Sync(0); - - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - __ UnpoisonHeapReference(expected); - // Do not unpoison `value`, if it is the same register as - // `expected`, which has just been unpoisoned. - if (value != expected) { - __ UnpoisonHeapReference(value); - } - } -} - -// boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x) -void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke, DataType::Type::kInt32, codegen_); -} - -// boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x) -void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASObject(HInvoke* invoke) { - // The only read barrier implementation supporting the - // UnsafeCASObject intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { - return; - } - - CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) { - // The only read barrier implementation supporting the - // UnsafeCASObject intrinsic is the Baker-style read barriers. - DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - - GenCas(invoke, DataType::Type::kReference, codegen_); -} - -// int java.lang.String.compareTo(String anotherString) -void IntrinsicLocationsBuilderMIPS::VisitStringCompareTo(HInvoke* invoke) { - LocationSummary* locations = new (allocator_) LocationSummary( - invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); - locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>())); -} - -void IntrinsicCodeGeneratorMIPS::VisitStringCompareTo(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); - - Register argument = locations->InAt(1).AsRegister<Register>(); - SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS(invoke); - codegen_->AddSlowPath(slow_path); - __ Beqz(argument, slow_path->GetEntryLabel()); - codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path); - __ Bind(slow_path->GetExitLabel()); -} - -// boolean java.lang.String.equals(Object anObject) -void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); - - // Temporary registers to store lengths of strings and for calculations. - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - Register str = locations->InAt(0).AsRegister<Register>(); - Register arg = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - Register temp1 = locations->GetTemp(0).AsRegister<Register>(); - Register temp2 = locations->GetTemp(1).AsRegister<Register>(); - Register temp3 = locations->GetTemp(2).AsRegister<Register>(); - - MipsLabel loop; - MipsLabel end; - MipsLabel return_true; - MipsLabel return_false; - - // Get offsets of count, value, and class fields within a string object. - const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); - const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); - const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); - - // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); - - // If the register containing the pointer to "this", and the register - // containing the pointer to "anObject" are the same register then - // "this", and "anObject" are the same object and we can - // short-circuit the logic to a true result. - if (str == arg) { - __ LoadConst32(out, 1); - return; - } - StringEqualsOptimizations optimizations(invoke); - if (!optimizations.GetArgumentNotNull()) { - // Check if input is null, return false if it is. - __ Beqz(arg, &return_false); - } - - // Reference equality check, return true if same reference. - __ Beq(str, arg, &return_true); - - if (!optimizations.GetArgumentIsString()) { - // Instanceof check for the argument by comparing class fields. - // All string objects must have the same type since String cannot be subclassed. - // Receiver must be a string object, so its class field is equal to all strings' class fields. - // If the argument is a string object, its class field must be equal to receiver's class field. - // - // As the String class is expected to be non-movable, we can read the class - // field from String.equals' arguments without read barriers. - AssertNonMovableStringClass(); - // /* HeapReference<Class> */ temp1 = str->klass_ - __ Lw(temp1, str, class_offset); - // /* HeapReference<Class> */ temp2 = arg->klass_ - __ Lw(temp2, arg, class_offset); - // Also, because we use the previously loaded class references only in the - // following comparison, we don't need to unpoison them. - __ Bne(temp1, temp2, &return_false); - } - - // Load `count` fields of this and argument strings. - __ Lw(temp1, str, count_offset); - __ Lw(temp2, arg, count_offset); - // Check if `count` fields are equal, return false if they're not. - // Also compares the compression style, if differs return false. - __ Bne(temp1, temp2, &return_false); - // Return true if both strings are empty. Even with string compression `count == 0` means empty. - static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, - "Expecting 0=compressed, 1=uncompressed"); - __ Beqz(temp1, &return_true); - - // Don't overwrite input registers - __ Move(TMP, str); - __ Move(temp3, arg); - - // Assertions that must hold in order to compare strings 4 bytes at a time. - DCHECK_ALIGNED(value_offset, 4); - static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded"); - - // For string compression, calculate the number of bytes to compare (not chars). - if (mirror::kUseStringCompression) { - // Extract compression flag. - if (IsR2OrNewer()) { - __ Ext(temp2, temp1, 0, 1); - } else { - __ Sll(temp2, temp1, 31); - __ Srl(temp2, temp2, 31); - } - __ Srl(temp1, temp1, 1); // Extract length. - __ Sllv(temp1, temp1, temp2); // Double the byte count if uncompressed. - } - - // Loop to compare strings 4 bytes at a time starting at the beginning of the string. - // Ok to do this because strings are zero-padded to kObjectAlignment. - __ Bind(&loop); - __ Lw(out, TMP, value_offset); - __ Lw(temp2, temp3, value_offset); - __ Bne(out, temp2, &return_false); - __ Addiu(TMP, TMP, 4); - __ Addiu(temp3, temp3, 4); - // With string compression, we have compared 4 bytes, otherwise 2 chars. - __ Addiu(temp1, temp1, mirror::kUseStringCompression ? -4 : -2); - __ Bgtz(temp1, &loop); - - // Return true and exit the function. - // If loop does not result in returning false, we return true. - __ Bind(&return_true); - __ LoadConst32(out, 1); - __ B(&end); - - // Return false and exit the function. - __ Bind(&return_false); - __ LoadConst32(out, 0); - __ Bind(&end); -} - -static void GenerateStringIndexOf(HInvoke* invoke, - bool start_at_zero, - MipsAssembler* assembler, - CodeGeneratorMIPS* codegen) { - LocationSummary* locations = invoke->GetLocations(); - Register tmp_reg = start_at_zero ? locations->GetTemp(0).AsRegister<Register>() : TMP; - - // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); - - // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, - // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. - SlowPathCodeMIPS* slow_path = nullptr; - HInstruction* code_point = invoke->InputAt(1); - if (code_point->IsIntConstant()) { - if (!IsUint<16>(code_point->AsIntConstant()->GetValue())) { - // Always needs the slow-path. We could directly dispatch to it, - // but this case should be rare, so for simplicity just put the - // full slow-path down and branch unconditionally. - slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathMIPS(invoke); - codegen->AddSlowPath(slow_path); - __ B(slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - return; - } - } else if (code_point->GetType() != DataType::Type::kUint16) { - Register char_reg = locations->InAt(1).AsRegister<Register>(); - // The "bltu" conditional branch tests to see if the character value - // fits in a valid 16-bit (MIPS halfword) value. If it doesn't then - // the character being searched for, if it exists in the string, is - // encoded using UTF-16 and stored in the string as two (16-bit) - // halfwords. Currently the assembly code used to implement this - // intrinsic doesn't support searching for a character stored as - // two halfwords so we fallback to using the generic implementation - // of indexOf(). - __ LoadConst32(tmp_reg, std::numeric_limits<uint16_t>::max()); - slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathMIPS(invoke); - codegen->AddSlowPath(slow_path); - __ Bltu(tmp_reg, char_reg, slow_path->GetEntryLabel()); - } - - if (start_at_zero) { - DCHECK_EQ(tmp_reg, A2); - // Start-index = 0. - __ Clear(tmp_reg); - } - - codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } -} - -// int java.lang.String.indexOf(int ch) -void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) { - LocationSummary* locations = new (allocator_) LocationSummary( - invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); - // We have a hand-crafted assembly stub that follows the runtime - // calling convention. So it's best to align the inputs accordingly. - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); - locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>())); - - // Need a temp for slow-path codepoint compare, and need to send start-index=0. - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); -} - -void IntrinsicCodeGeneratorMIPS::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, /* start_at_zero= */ true, GetAssembler(), codegen_); -} - -// int java.lang.String.indexOf(int ch, int fromIndex) -void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) { - LocationSummary* locations = new (allocator_) LocationSummary( - invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); - // We have a hand-crafted assembly stub that follows the runtime - // calling convention. So it's best to align the inputs accordingly. - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); - locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>())); - - // Need a temp for slow-path codepoint compare. - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorMIPS::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, /* start_at_zero= */ false, GetAssembler(), codegen_); -} - -// java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount) -void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) { - LocationSummary* locations = new (allocator_) LocationSummary( - invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); - Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); - locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>())); -} - -void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - Register byte_array = locations->InAt(0).AsRegister<Register>(); - SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS(invoke); - codegen_->AddSlowPath(slow_path); - __ Beqz(byte_array, slow_path->GetEntryLabel()); - codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); - __ Bind(slow_path->GetExitLabel()); -} - -// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) -void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromChars(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); - locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>())); -} - -void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromChars(HInvoke* invoke) { - // No need to emit code checking whether `locations->InAt(2)` is a null - // pointer, as callers of the native method - // - // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) - // - // all include a null check on `data` before calling that method. - codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); -} - -// java.lang.StringFactory.newStringFromString(String toCopy) -void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromString(HInvoke* invoke) { - LocationSummary* locations = new (allocator_) LocationSummary( - invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); - locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>())); -} - -void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromString(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - Register string_to_copy = locations->InAt(0).AsRegister<Register>(); - SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS(invoke); - codegen_->AddSlowPath(slow_path); - __ Beqz(string_to_copy, slow_path->GetEntryLabel()); - codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc()); - __ Bind(slow_path->GetExitLabel()); -} - -static void GenIsInfinite(LocationSummary* locations, - const DataType::Type type, - const bool isR6, - MipsAssembler* assembler) { - FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); - Register out = locations->Out().AsRegister<Register>(); - - DCHECK(type == DataType::Type::kFloat32 || type == DataType::Type::kFloat64); - - if (isR6) { - if (type == DataType::Type::kFloat64) { - __ ClassD(FTMP, in); - } else { - __ ClassS(FTMP, in); - } - __ Mfc1(out, FTMP); - __ Andi(out, out, kPositiveInfinity | kNegativeInfinity); - __ Sltu(out, ZERO, out); - } else { - // If one, or more, of the exponent bits is zero, then the number can't be infinite. - if (type == DataType::Type::kFloat64) { - __ MoveFromFpuHigh(TMP, in); - __ LoadConst32(AT, High32Bits(kPositiveInfinityDouble)); - } else { - __ Mfc1(TMP, in); - __ LoadConst32(AT, kPositiveInfinityFloat); - } - __ Xor(TMP, TMP, AT); - - __ Sll(TMP, TMP, 1); - - if (type == DataType::Type::kFloat64) { - __ Mfc1(AT, in); - __ Or(TMP, TMP, AT); - } - // If any of the significand bits are one, then the number is not infinite. - __ Sltiu(out, TMP, 1); - } -} - -// boolean java.lang.Float.isInfinite(float) -void IntrinsicLocationsBuilderMIPS::VisitFloatIsInfinite(HInvoke* invoke) { - CreateFPToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitFloatIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), DataType::Type::kFloat32, IsR6(), GetAssembler()); -} - -// boolean java.lang.Double.isInfinite(double) -void IntrinsicLocationsBuilderMIPS::VisitDoubleIsInfinite(HInvoke* invoke) { - CreateFPToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitDoubleIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), DataType::Type::kFloat64, IsR6(), GetAssembler()); -} - -static void GenHighestOneBit(LocationSummary* locations, - const DataType::Type type, - bool isR6, - MipsAssembler* assembler) { - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); - - if (type == DataType::Type::kInt64) { - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - if (isR6) { - __ ClzR6(TMP, in_hi); - } else { - __ ClzR2(TMP, in_hi); - } - __ LoadConst32(AT, 0x80000000); - __ Srlv(out_hi, AT, TMP); - __ And(out_hi, out_hi, in_hi); - if (isR6) { - __ ClzR6(TMP, in_lo); - } else { - __ ClzR2(TMP, in_lo); - } - __ Srlv(out_lo, AT, TMP); - __ And(out_lo, out_lo, in_lo); - if (isR6) { - __ Seleqz(out_lo, out_lo, out_hi); - } else { - __ Movn(out_lo, ZERO, out_hi); - } - } else { - Register in = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (isR6) { - __ ClzR6(TMP, in); - } else { - __ ClzR2(TMP, in); - } - __ LoadConst32(AT, 0x80000000); - __ Srlv(AT, AT, TMP); // Srlv shifts in the range of [0;31] bits (lower 5 bits of arg). - __ And(out, AT, in); // So this is required for 0 (=shift by 32). - } -} - -// int java.lang.Integer.highestOneBit(int) -void IntrinsicLocationsBuilderMIPS::VisitIntegerHighestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitIntegerHighestOneBit(HInvoke* invoke) { - GenHighestOneBit(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler()); -} - -// long java.lang.Long.highestOneBit(long) -void IntrinsicLocationsBuilderMIPS::VisitLongHighestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke, Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorMIPS::VisitLongHighestOneBit(HInvoke* invoke) { - GenHighestOneBit(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler()); -} - -static void GenLowestOneBit(LocationSummary* locations, - const DataType::Type type, - bool isR6, - MipsAssembler* assembler) { - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); - - if (type == DataType::Type::kInt64) { - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - __ Subu(TMP, ZERO, in_lo); - __ And(out_lo, TMP, in_lo); - __ Subu(TMP, ZERO, in_hi); - __ And(out_hi, TMP, in_hi); - if (isR6) { - __ Seleqz(out_hi, out_hi, out_lo); - } else { - __ Movn(out_hi, ZERO, out_lo); - } - } else { - Register in = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - __ Subu(TMP, ZERO, in); - __ And(out, TMP, in); - } -} - -// int java.lang.Integer.lowestOneBit(int) -void IntrinsicLocationsBuilderMIPS::VisitIntegerLowestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitIntegerLowestOneBit(HInvoke* invoke) { - GenLowestOneBit(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler()); -} - -// long java.lang.Long.lowestOneBit(long) -void IntrinsicLocationsBuilderMIPS::VisitLongLowestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitLongLowestOneBit(HInvoke* invoke) { - GenLowestOneBit(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler()); -} - -// int java.lang.Math.round(float) -void IntrinsicLocationsBuilderMIPS::VisitMathRoundFloat(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { - LocationSummary* locations = invoke->GetLocations(); - MipsAssembler* assembler = GetAssembler(); - FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister half = locations->GetTemp(0).AsFpuRegister<FRegister>(); - Register out = locations->Out().AsRegister<Register>(); - - MipsLabel done; - - if (IsR6()) { - // out = floor(in); - // - // if (out != MAX_VALUE && out != MIN_VALUE) { - // TMP = ((in - out) >= 0.5) ? 1 : 0; - // return out += TMP; - // } - // return out; - - // out = floor(in); - __ FloorWS(FTMP, in); - __ Mfc1(out, FTMP); - - // if (out != MAX_VALUE && out != MIN_VALUE) - __ Addiu(TMP, out, 1); - __ Aui(TMP, TMP, 0x8000); // TMP = out + 0x8000 0001 - // or out - 0x7FFF FFFF. - // IOW, TMP = 1 if out = Int.MIN_VALUE - // or TMP = 0 if out = Int.MAX_VALUE. - __ Srl(TMP, TMP, 1); // TMP = 0 if out = Int.MIN_VALUE - // or out = Int.MAX_VALUE. - __ Beqz(TMP, &done); - - // TMP = (0.5f <= (in - out)) ? -1 : 0; - __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float". - __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f)); - __ SubS(FTMP, in, FTMP); - __ Mtc1(AT, half); - - __ CmpLeS(FTMP, half, FTMP); - __ Mfc1(TMP, FTMP); - - // Return out -= TMP. - __ Subu(out, out, TMP); - } else { - // if (in.isNaN) { - // return 0; - // } - // - // out = floor.w.s(in); - // - // /* - // * This "if" statement is only needed for the pre-R6 version of floor.w.s - // * which outputs Integer.MAX_VALUE for negative numbers with magnitudes - // * too large to fit in a 32-bit integer. - // */ - // if (out == Integer.MAX_VALUE) { - // TMP = (in < 0.0f) ? 1 : 0; - // /* - // * If TMP is 1, then adding it to out will wrap its value from - // * Integer.MAX_VALUE to Integer.MIN_VALUE. - // */ - // return out += TMP; - // } - // - // /* - // * For negative values not handled by the previous "if" statement the - // * test here will correctly set the value of TMP. - // */ - // TMP = ((in - out) >= 0.5f) ? 1 : 0; - // return out += TMP; - - MipsLabel finite; - MipsLabel add; - - // Test for NaN. - __ CunS(in, in); - - // Return zero for NaN. - __ Move(out, ZERO); - __ Bc1t(&done); - - // out = floor(in); - __ FloorWS(FTMP, in); - __ Mfc1(out, FTMP); - - __ LoadConst32(TMP, -1); - - // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0; - __ LoadConst32(AT, std::numeric_limits<int32_t>::max()); - __ Bne(AT, out, &finite); - - __ Mtc1(ZERO, FTMP); - __ ColtS(in, FTMP); - - __ B(&add); - - __ Bind(&finite); - - // TMP = (0.5f <= (in - out)) ? -1 : 0; - __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float". - __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f)); - __ SubS(FTMP, in, FTMP); - __ Mtc1(AT, half); - __ ColeS(half, FTMP); - - __ Bind(&add); - - __ Movf(TMP, ZERO); - - // Return out -= TMP. - __ Subu(out, out, TMP); - } - __ Bind(&done); -} - -// void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) -void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetInAt(2, Location::RequiresRegister()); - locations->SetInAt(3, Location::RequiresRegister()); - locations->SetInAt(4, Location::RequiresRegister()); - - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = DataType::Size(DataType::Type::kUint16); - DCHECK_EQ(char_size, 2u); - const size_t char_shift = DataType::SizeShift(DataType::Type::kUint16); - - Register srcObj = locations->InAt(0).AsRegister<Register>(); - Register srcBegin = locations->InAt(1).AsRegister<Register>(); - Register srcEnd = locations->InAt(2).AsRegister<Register>(); - Register dstObj = locations->InAt(3).AsRegister<Register>(); - Register dstBegin = locations->InAt(4).AsRegister<Register>(); - - Register dstPtr = locations->GetTemp(0).AsRegister<Register>(); - Register srcPtr = locations->GetTemp(1).AsRegister<Register>(); - Register numChrs = locations->GetTemp(2).AsRegister<Register>(); - - MipsLabel done; - MipsLabel loop; - - // Location of data in char array buffer. - const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); - - // Get offset of value field within a string object. - const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); - - __ Beq(srcEnd, srcBegin, &done); // No characters to move. - - // Calculate number of characters to be copied. - __ Subu(numChrs, srcEnd, srcBegin); - - // Calculate destination address. - __ Addiu(dstPtr, dstObj, data_offset); - __ ShiftAndAdd(dstPtr, dstBegin, dstPtr, char_shift); - - if (mirror::kUseStringCompression) { - MipsLabel uncompressed_copy, compressed_loop; - const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); - // Load count field and extract compression flag. - __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset); - __ Sll(TMP, TMP, 31); - - // If string is uncompressed, use uncompressed path. - __ Bnez(TMP, &uncompressed_copy); - - // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. - __ Addu(srcPtr, srcObj, srcBegin); - __ Bind(&compressed_loop); - __ LoadFromOffset(kLoadUnsignedByte, TMP, srcPtr, value_offset); - __ StoreToOffset(kStoreHalfword, TMP, dstPtr, 0); - __ Addiu(numChrs, numChrs, -1); - __ Addiu(srcPtr, srcPtr, 1); - __ Addiu(dstPtr, dstPtr, 2); - __ Bnez(numChrs, &compressed_loop); - - __ B(&done); - __ Bind(&uncompressed_copy); - } - - // Calculate source address. - __ Addiu(srcPtr, srcObj, value_offset); - __ ShiftAndAdd(srcPtr, srcBegin, srcPtr, char_shift); - - __ Bind(&loop); - __ Lh(AT, srcPtr, 0); - __ Addiu(numChrs, numChrs, -1); - __ Addiu(srcPtr, srcPtr, char_size); - __ Sh(AT, dstPtr, 0); - __ Addiu(dstPtr, dstPtr, char_size); - __ Bnez(numChrs, &loop); - - __ Bind(&done); -} - -static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); - InvokeRuntimeCallingConvention calling_convention; - - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kFloat64)); -} - -static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); - InvokeRuntimeCallingConvention calling_convention; - - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kFloat64)); -} - -static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorMIPS* codegen, QuickEntrypointEnum entry) { - LocationSummary* locations = invoke->GetLocations(); - FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); - DCHECK_EQ(in, F12); - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - DCHECK_EQ(out, F0); - - codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); -} - -static void GenFPFPToFPCall(HInvoke* invoke, - CodeGeneratorMIPS* codegen, - QuickEntrypointEnum entry) { - LocationSummary* locations = invoke->GetLocations(); - FRegister in0 = locations->InAt(0).AsFpuRegister<FRegister>(); - DCHECK_EQ(in0, F12); - FRegister in1 = locations->InAt(1).AsFpuRegister<FRegister>(); - DCHECK_EQ(in1, F14); - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - DCHECK_EQ(out, F0); - - codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); -} - -// static double java.lang.Math.cos(double a) -void IntrinsicLocationsBuilderMIPS::VisitMathCos(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathCos(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickCos); -} - -// static double java.lang.Math.sin(double a) -void IntrinsicLocationsBuilderMIPS::VisitMathSin(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathSin(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickSin); -} - -// static double java.lang.Math.acos(double a) -void IntrinsicLocationsBuilderMIPS::VisitMathAcos(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAcos(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickAcos); -} - -// static double java.lang.Math.asin(double a) -void IntrinsicLocationsBuilderMIPS::VisitMathAsin(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAsin(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickAsin); -} - -// static double java.lang.Math.atan(double a) -void IntrinsicLocationsBuilderMIPS::VisitMathAtan(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAtan(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickAtan); -} - -// static double java.lang.Math.atan2(double y, double x) -void IntrinsicLocationsBuilderMIPS::VisitMathAtan2(HInvoke* invoke) { - CreateFPFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAtan2(HInvoke* invoke) { - GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); -} - -// static double java.lang.Math.pow(double y, double x) -void IntrinsicLocationsBuilderMIPS::VisitMathPow(HInvoke* invoke) { - CreateFPFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathPow(HInvoke* invoke) { - GenFPFPToFPCall(invoke, codegen_, kQuickPow); -} - -// static double java.lang.Math.cbrt(double a) -void IntrinsicLocationsBuilderMIPS::VisitMathCbrt(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathCbrt(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickCbrt); -} - -// static double java.lang.Math.cosh(double x) -void IntrinsicLocationsBuilderMIPS::VisitMathCosh(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathCosh(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickCosh); -} - -// static double java.lang.Math.exp(double a) -void IntrinsicLocationsBuilderMIPS::VisitMathExp(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathExp(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickExp); -} - -// static double java.lang.Math.expm1(double x) -void IntrinsicLocationsBuilderMIPS::VisitMathExpm1(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathExpm1(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickExpm1); -} - -// static double java.lang.Math.hypot(double x, double y) -void IntrinsicLocationsBuilderMIPS::VisitMathHypot(HInvoke* invoke) { - CreateFPFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathHypot(HInvoke* invoke) { - GenFPFPToFPCall(invoke, codegen_, kQuickHypot); -} - -// static double java.lang.Math.log(double a) -void IntrinsicLocationsBuilderMIPS::VisitMathLog(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathLog(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickLog); -} - -// static double java.lang.Math.log10(double x) -void IntrinsicLocationsBuilderMIPS::VisitMathLog10(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathLog10(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickLog10); -} - -// static double java.lang.Math.nextAfter(double start, double direction) -void IntrinsicLocationsBuilderMIPS::VisitMathNextAfter(HInvoke* invoke) { - CreateFPFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathNextAfter(HInvoke* invoke) { - GenFPFPToFPCall(invoke, codegen_, kQuickNextAfter); -} - -// static double java.lang.Math.sinh(double x) -void IntrinsicLocationsBuilderMIPS::VisitMathSinh(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathSinh(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickSinh); -} - -// static double java.lang.Math.tan(double a) -void IntrinsicLocationsBuilderMIPS::VisitMathTan(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathTan(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickTan); -} - -// static double java.lang.Math.tanh(double x) -void IntrinsicLocationsBuilderMIPS::VisitMathTanh(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathTanh(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickTanh); -} - -// static void java.lang.System.arraycopy(Object src, int srcPos, -// Object dest, int destPos, -// int length) -void IntrinsicLocationsBuilderMIPS::VisitSystemArrayCopyChar(HInvoke* invoke) { - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); - - // As long as we are checking, we might as well check to see if the src and dest - // positions are >= 0. - if ((src_pos != nullptr && src_pos->GetValue() < 0) || - (dest_pos != nullptr && dest_pos->GetValue() < 0)) { - // We will have to fail anyways. - return; - } - - // And since we are already checking, check the length too. - if (length != nullptr) { - int32_t len = length->GetValue(); - if (len < 0) { - // Just call as normal. - return; - } - } - - // Okay, it is safe to generate inline code. - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); - // arraycopy(Object src, int srcPos, Object dest, int destPos, int length). - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetInAt(2, Location::RequiresRegister()); - locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); - locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); - - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); -} - -// Utility routine to verify that "length(input) - pos >= length" -static void EnoughItems(MipsAssembler* assembler, - Register length_input_minus_pos, - Location length, - SlowPathCodeMIPS* slow_path) { - if (length.IsConstant()) { - int32_t length_constant = length.GetConstant()->AsIntConstant()->GetValue(); - - if (IsInt<16>(length_constant)) { - __ Slti(TMP, length_input_minus_pos, length_constant); - __ Bnez(TMP, slow_path->GetEntryLabel()); - } else { - __ LoadConst32(TMP, length_constant); - __ Blt(length_input_minus_pos, TMP, slow_path->GetEntryLabel()); - } - } else { - __ Blt(length_input_minus_pos, length.AsRegister<Register>(), slow_path->GetEntryLabel()); - } -} - -static void CheckPosition(MipsAssembler* assembler, - Location pos, - Register input, - Location length, - SlowPathCodeMIPS* slow_path, - bool length_is_input_length = false) { - // Where is the length in the Array? - const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); - - // Calculate length(input) - pos. - if (pos.IsConstant()) { - int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); - if (pos_const == 0) { - if (!length_is_input_length) { - // Check that length(input) >= length. - __ LoadFromOffset(kLoadWord, AT, input, length_offset); - EnoughItems(assembler, AT, length, slow_path); - } - } else { - // Check that (length(input) - pos) >= zero. - __ LoadFromOffset(kLoadWord, AT, input, length_offset); - DCHECK_GT(pos_const, 0); - __ Addiu32(AT, AT, -pos_const, TMP); - __ Bltz(AT, slow_path->GetEntryLabel()); - - // Verify that (length(input) - pos) >= length. - EnoughItems(assembler, AT, length, slow_path); - } - } else if (length_is_input_length) { - // The only way the copy can succeed is if pos is zero. - Register pos_reg = pos.AsRegister<Register>(); - __ Bnez(pos_reg, slow_path->GetEntryLabel()); - } else { - // Verify that pos >= 0. - Register pos_reg = pos.AsRegister<Register>(); - __ Bltz(pos_reg, slow_path->GetEntryLabel()); - - // Check that (length(input) - pos) >= zero. - __ LoadFromOffset(kLoadWord, AT, input, length_offset); - __ Subu(AT, AT, pos_reg); - __ Bltz(AT, slow_path->GetEntryLabel()); - - // Verify that (length(input) - pos) >= length. - EnoughItems(assembler, AT, length, slow_path); - } -} - -void IntrinsicCodeGeneratorMIPS::VisitSystemArrayCopyChar(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - Register src = locations->InAt(0).AsRegister<Register>(); - Location src_pos = locations->InAt(1); - Register dest = locations->InAt(2).AsRegister<Register>(); - Location dest_pos = locations->InAt(3); - Location length = locations->InAt(4); - - MipsLabel loop; - - Register dest_base = locations->GetTemp(0).AsRegister<Register>(); - Register src_base = locations->GetTemp(1).AsRegister<Register>(); - Register count = locations->GetTemp(2).AsRegister<Register>(); - - SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS(invoke); - codegen_->AddSlowPath(slow_path); - - // Bail out if the source and destination are the same (to handle overlap). - __ Beq(src, dest, slow_path->GetEntryLabel()); - - // Bail out if the source is null. - __ Beqz(src, slow_path->GetEntryLabel()); - - // Bail out if the destination is null. - __ Beqz(dest, slow_path->GetEntryLabel()); - - // Load length into register for count. - if (length.IsConstant()) { - __ LoadConst32(count, length.GetConstant()->AsIntConstant()->GetValue()); - } else { - // If the length is negative, bail out. - // We have already checked in the LocationsBuilder for the constant case. - __ Bltz(length.AsRegister<Register>(), slow_path->GetEntryLabel()); - - __ Move(count, length.AsRegister<Register>()); - } - - // Validity checks: source. - CheckPosition(assembler, src_pos, src, Location::RegisterLocation(count), slow_path); - - // Validity checks: dest. - CheckPosition(assembler, dest_pos, dest, Location::RegisterLocation(count), slow_path); - - // If count is zero, we're done. - __ Beqz(count, slow_path->GetExitLabel()); - - // Okay, everything checks out. Finally time to do the copy. - // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = DataType::Size(DataType::Type::kUint16); - DCHECK_EQ(char_size, 2u); - - const size_t char_shift = DataType::SizeShift(DataType::Type::kUint16); - - const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); - - // Calculate source and destination addresses. - if (src_pos.IsConstant()) { - int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue(); - - __ Addiu32(src_base, src, data_offset + char_size * src_pos_const, TMP); - } else { - __ Addiu32(src_base, src, data_offset, TMP); - __ ShiftAndAdd(src_base, src_pos.AsRegister<Register>(), src_base, char_shift); - } - if (dest_pos.IsConstant()) { - int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - - __ Addiu32(dest_base, dest, data_offset + char_size * dest_pos_const, TMP); - } else { - __ Addiu32(dest_base, dest, data_offset, TMP); - __ ShiftAndAdd(dest_base, dest_pos.AsRegister<Register>(), dest_base, char_shift); - } - - __ Bind(&loop); - __ Lh(TMP, src_base, 0); - __ Addiu(src_base, src_base, char_size); - __ Addiu(count, count, -1); - __ Sh(TMP, dest_base, 0); - __ Addiu(dest_base, dest_base, char_size); - __ Bnez(count, &loop); - - __ Bind(slow_path->GetExitLabel()); -} - -// long java.lang.Integer.valueOf(long) -void IntrinsicLocationsBuilderMIPS::VisitIntegerValueOf(HInvoke* invoke) { - InvokeRuntimeCallingConvention calling_convention; - IntrinsicVisitor::ComputeIntegerValueOfLocations( - invoke, - codegen_, - calling_convention.GetReturnLocation(DataType::Type::kReference), - Location::RegisterLocation(calling_convention.GetRegisterAt(0))); -} - -void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = - IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); - LocationSummary* locations = invoke->GetLocations(); - MipsAssembler* assembler = GetAssembler(); - InstructionCodeGeneratorMIPS* icodegen = - down_cast<InstructionCodeGeneratorMIPS*>(codegen_->GetInstructionVisitor()); - - Register out = locations->Out().AsRegister<Register>(); - if (invoke->InputAt(0)->IsConstant()) { - int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (static_cast<uint32_t>(value - info.low) < info.length) { - // Just embed the j.l.Integer in the code. - DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); - codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); - } else { - DCHECK(locations->CanCall()); - // Allocate and initialize a new j.l.Integer. - // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the - // JIT object table. - codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), - info.integer_boot_image_offset); - __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP); - // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation - // one. - icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); - } - } else { - DCHECK(locations->CanCall()); - Register in = locations->InAt(0).AsRegister<Register>(); - MipsLabel allocate, done; - - __ Addiu32(out, in, -info.low); - // As unsigned quantities is out < info.length ? - if (IsUint<15>(info.length)) { - __ Sltiu(AT, out, info.length); - } else { - __ LoadConst32(AT, info.length); - __ Sltu(AT, out, AT); - } - // Branch if out >= info.length. This means that "in" is outside of the valid range. - __ Beqz(AT, &allocate); - - // If the value is within the bounds, load the j.l.Integer directly from the array. - codegen_->LoadBootImageAddress(TMP, info.array_data_boot_image_reference); - __ ShiftAndAdd(out, out, TMP, TIMES_4); - __ Lw(out, out, 0); - __ MaybeUnpoisonHeapReference(out); - __ B(&done); - - __ Bind(&allocate); - // Otherwise allocate and initialize a new j.l.Integer. - codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), - info.integer_boot_image_offset); - __ StoreToOffset(kStoreWord, in, out, info.value_offset); - // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation - // one. - icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); - __ Bind(&done); - } -} - -// static boolean java.lang.Thread.interrupted() -void IntrinsicLocationsBuilderMIPS::VisitThreadInterrupted(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetOut(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorMIPS::VisitThreadInterrupted(HInvoke* invoke) { - MipsAssembler* assembler = GetAssembler(); - Register out = invoke->GetLocations()->Out().AsRegister<Register>(); - int32_t offset = Thread::InterruptedOffset<kMipsPointerSize>().Int32Value(); - __ LoadFromOffset(kLoadWord, out, TR, offset); - MipsLabel done; - __ Beqz(out, &done); - __ Sync(0); - __ StoreToOffset(kStoreWord, ZERO, TR, offset); - __ Sync(0); - __ Bind(&done); -} - -void IntrinsicLocationsBuilderMIPS::VisitReachabilityFence(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::Any()); -} - -void IntrinsicCodeGeneratorMIPS::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } - -// Unimplemented intrinsics. - -UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil) -UNIMPLEMENTED_INTRINSIC(MIPS, MathFloor) -UNIMPLEMENTED_INTRINSIC(MIPS, MathRint) -UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetLongVolatile); -UNIMPLEMENTED_INTRINSIC(MIPS, UnsafePutLongVolatile); -UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong) - -UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopy) - -UNIMPLEMENTED_INTRINSIC(MIPS, CRC32Update) -UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateBytes) -UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateByteBuffer) - -UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(MIPS, StringBuilderAppend); -UNIMPLEMENTED_INTRINSIC(MIPS, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(MIPS, StringBuilderToString); - -// 1.8. -UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject) - -UNREACHABLE_INTRINSICS(MIPS) - -#undef __ - -} // namespace mips -} // namespace art diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h deleted file mode 100644 index 08d4e82139..0000000000 --- a/compiler/optimizing/intrinsics_mips.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS_H_ -#define ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS_H_ - -#include "intrinsics.h" - -namespace art { - -class ArenaAllocator; -class HInvokeStaticOrDirect; -class HInvokeVirtual; - -namespace mips { - -class CodeGeneratorMIPS; -class MipsAssembler; - -class IntrinsicLocationsBuilderMIPS final : public IntrinsicVisitor { - public: - explicit IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen); - - // Define visitor methods. - -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS - - // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether - // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to - // the invoke. - bool TryDispatch(HInvoke* invoke); - - private: - CodeGeneratorMIPS* const codegen_; - ArenaAllocator* const allocator_; - - DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS); -}; - -class IntrinsicCodeGeneratorMIPS final : public IntrinsicVisitor { - public: - explicit IntrinsicCodeGeneratorMIPS(CodeGeneratorMIPS* codegen) : codegen_(codegen) {} - - // Define visitor methods. - -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS - - bool IsR2OrNewer() const; - bool IsR6() const; - bool Is32BitFPU() const; - bool HasMsa() const; - - private: - MipsAssembler* GetAssembler(); - - ArenaAllocator* GetAllocator(); - - CodeGeneratorMIPS* const codegen_; - - DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorMIPS); -}; - -} // namespace mips -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS_H_ diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc deleted file mode 100644 index 3e687652d3..0000000000 --- a/compiler/optimizing/intrinsics_mips64.cc +++ /dev/null @@ -1,2382 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "intrinsics_mips64.h" - -#include "arch/mips64/instruction_set_features_mips64.h" -#include "art_method.h" -#include "code_generator_mips64.h" -#include "entrypoints/quick/quick_entrypoints.h" -#include "heap_poisoning.h" -#include "intrinsics.h" -#include "mirror/array-inl.h" -#include "mirror/object_array-inl.h" -#include "mirror/string.h" -#include "scoped_thread_state_change-inl.h" -#include "thread.h" -#include "utils/mips64/assembler_mips64.h" -#include "utils/mips64/constants_mips64.h" - -namespace art { - -namespace mips64 { - -IntrinsicLocationsBuilderMIPS64::IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen) - : codegen_(codegen), allocator_(codegen->GetGraph()->GetAllocator()) { -} - -Mips64Assembler* IntrinsicCodeGeneratorMIPS64::GetAssembler() { - return reinterpret_cast<Mips64Assembler*>(codegen_->GetAssembler()); -} - -ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() { - return codegen_->GetGraph()->GetAllocator(); -} - -inline bool IntrinsicCodeGeneratorMIPS64::HasMsa() const { - return codegen_->GetInstructionSetFeatures().HasMsa(); -} - -#define __ codegen->GetAssembler()-> - -static void MoveFromReturnRegister(Location trg, - DataType::Type type, - CodeGeneratorMIPS64* codegen) { - if (!trg.IsValid()) { - DCHECK_EQ(type, DataType::Type::kVoid); - return; - } - - DCHECK_NE(type, DataType::Type::kVoid); - - if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) { - GpuRegister trg_reg = trg.AsRegister<GpuRegister>(); - if (trg_reg != V0) { - __ Move(V0, trg_reg); - } - } else { - FpuRegister trg_reg = trg.AsFpuRegister<FpuRegister>(); - if (trg_reg != F0) { - if (type == DataType::Type::kFloat32) { - __ MovS(F0, trg_reg); - } else { - __ MovD(F0, trg_reg); - } - } - } -} - -static void MoveArguments(HInvoke* invoke, CodeGeneratorMIPS64* codegen) { - InvokeDexCallingConventionVisitorMIPS64 calling_convention_visitor; - IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); -} - -// Slow-path for fallback (calling the managed code to handle the -// intrinsic) in an intrinsified call. This will copy the arguments -// into the positions for a regular call. -// -// Note: The actual parameters are required to be in the locations -// given by the invoke's location summary. If an intrinsic -// modifies those locations before a slowpath call, they must be -// restored! -class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { - public: - explicit IntrinsicSlowPathMIPS64(HInvoke* invoke) - : SlowPathCodeMIPS64(invoke), invoke_(invoke) { } - - void EmitNativeCode(CodeGenerator* codegen_in) override { - CodeGeneratorMIPS64* codegen = down_cast<CodeGeneratorMIPS64*>(codegen_in); - - __ Bind(GetEntryLabel()); - - SaveLiveRegisters(codegen, invoke_->GetLocations()); - - MoveArguments(invoke_, codegen); - - if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall( - invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(A0), this); - } else { - codegen->GenerateVirtualCall( - invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0), this); - } - - // Copy the result back to the expected output. - Location out = invoke_->GetLocations()->Out(); - if (out.IsValid()) { - DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. - DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); - MoveFromReturnRegister(out, invoke_->GetType(), codegen); - } - - RestoreLiveRegisters(codegen, invoke_->GetLocations()); - __ Bc(GetExitLabel()); - } - - const char* GetDescription() const override { return "IntrinsicSlowPathMIPS64"; } - - private: - // The instruction where this slow path is happening. - HInvoke* const invoke_; - - DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathMIPS64); -}; - -#undef __ - -bool IntrinsicLocationsBuilderMIPS64::TryDispatch(HInvoke* invoke) { - Dispatch(invoke); - LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); -} - -#define __ assembler-> - -static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresRegister()); -} - -static void MoveFPToInt(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (is64bit) { - __ Dmfc1(out, in); - } else { - __ Mfc1(out, in); - } -} - -// long java.lang.Double.doubleToRawLongBits(double) -void IntrinsicLocationsBuilderMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - CreateFPToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); -} - -// int java.lang.Float.floatToRawIntBits(float) -void IntrinsicLocationsBuilderMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - CreateFPToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); -} - -static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); -} - -static void MoveIntToFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - if (is64bit) { - __ Dmtc1(in, out); - } else { - __ Mtc1(in, out); - } -} - -// double java.lang.Double.longBitsToDouble(long) -void IntrinsicLocationsBuilderMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - CreateIntToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); -} - -// float java.lang.Float.intBitsToFloat(int) -void IntrinsicLocationsBuilderMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - CreateIntToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); -} - -static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -static void GenReverseBytes(LocationSummary* locations, - DataType::Type type, - Mips64Assembler* assembler) { - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - switch (type) { - case DataType::Type::kInt16: - __ Dsbh(out, in); - __ Seh(out, out); - break; - case DataType::Type::kInt32: - __ Rotr(out, in, 16); - __ Wsbh(out, out); - break; - case DataType::Type::kInt64: - __ Dsbh(out, in); - __ Dshd(out, out); - break; - default: - LOG(FATAL) << "Unexpected size for reverse-bytes: " << type; - UNREACHABLE(); - } -} - -// int java.lang.Integer.reverseBytes(int) -void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); -} - -// long java.lang.Long.reverseBytes(long) -void IntrinsicLocationsBuilderMIPS64::VisitLongReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitLongReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); -} - -// short java.lang.Short.reverseBytes(short) -void IntrinsicLocationsBuilderMIPS64::VisitShortReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitShortReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); -} - -static void GenNumberOfLeadingZeroes(LocationSummary* locations, - bool is64bit, - Mips64Assembler* assembler) { - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (is64bit) { - __ Dclz(out, in); - } else { - __ Clz(out, in); - } -} - -// int java.lang.Integer.numberOfLeadingZeros(int i) -void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); -} - -// int java.lang.Long.numberOfLeadingZeros(long i) -void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); -} - -static void GenNumberOfTrailingZeroes(LocationSummary* locations, - bool is64bit, - Mips64Assembler* assembler) { - Location in = locations->InAt(0); - Location out = locations->Out(); - - if (is64bit) { - __ Dsbh(out.AsRegister<GpuRegister>(), in.AsRegister<GpuRegister>()); - __ Dshd(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>()); - __ Dbitswap(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>()); - __ Dclz(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>()); - } else { - __ Rotr(out.AsRegister<GpuRegister>(), in.AsRegister<GpuRegister>(), 16); - __ Wsbh(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>()); - __ Bitswap(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>()); - __ Clz(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>()); - } -} - -// int java.lang.Integer.numberOfTrailingZeros(int i) -void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); -} - -// int java.lang.Long.numberOfTrailingZeros(long i) -void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); -} - -static void GenReverse(LocationSummary* locations, - DataType::Type type, - Mips64Assembler* assembler) { - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); - - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (type == DataType::Type::kInt32) { - __ Rotr(out, in, 16); - __ Wsbh(out, out); - __ Bitswap(out, out); - } else { - __ Dsbh(out, in); - __ Dshd(out, out); - __ Dbitswap(out, out); - } -} - -// int java.lang.Integer.reverse(int) -void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverse(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverse(HInvoke* invoke) { - GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); -} - -// long java.lang.Long.reverse(long) -void IntrinsicLocationsBuilderMIPS64::VisitLongReverse(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitLongReverse(HInvoke* invoke) { - GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); -} - -static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -static void GenBitCount(LocationSummary* locations, - const DataType::Type type, - const bool hasMsa, - Mips64Assembler* assembler) { - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); - - // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel - // - // A generalization of the best bit counting method to integers of - // bit-widths up to 128 (parameterized by type T) is this: - // - // v = v - ((v >> 1) & (T)~(T)0/3); // temp - // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3); // temp - // v = (v + (v >> 4)) & (T)~(T)0/255*15; // temp - // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; // count - // - // For comparison, for 32-bit quantities, this algorithm can be executed - // using 20 MIPS instructions (the calls to LoadConst32() generate two - // machine instructions each for the values being used in this algorithm). - // A(n unrolled) loop-based algorithm requires 25 instructions. - // - // For a 64-bit operand this can be performed in 24 instructions compared - // to a(n unrolled) loop based algorithm which requires 38 instructions. - // - // There are algorithms which are faster in the cases where very few - // bits are set but the algorithm here attempts to minimize the total - // number of instructions executed even when a large number of bits - // are set. - if (hasMsa) { - if (type == DataType::Type::kInt32) { - __ Mtc1(in, FTMP); - __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); - __ Mfc1(out, FTMP); - } else { - __ Dmtc1(in, FTMP); - __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); - __ Dmfc1(out, FTMP); - } - } else { - if (type == DataType::Type::kInt32) { - __ Srl(TMP, in, 1); - __ LoadConst32(AT, 0x55555555); - __ And(TMP, TMP, AT); - __ Subu(TMP, in, TMP); - __ LoadConst32(AT, 0x33333333); - __ And(out, TMP, AT); - __ Srl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Addu(TMP, out, TMP); - __ Srl(out, TMP, 4); - __ Addu(out, out, TMP); - __ LoadConst32(AT, 0x0F0F0F0F); - __ And(out, out, AT); - __ LoadConst32(TMP, 0x01010101); - __ MulR6(out, out, TMP); - __ Srl(out, out, 24); - } else { - __ Dsrl(TMP, in, 1); - __ LoadConst64(AT, 0x5555555555555555L); - __ And(TMP, TMP, AT); - __ Dsubu(TMP, in, TMP); - __ LoadConst64(AT, 0x3333333333333333L); - __ And(out, TMP, AT); - __ Dsrl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Daddu(TMP, out, TMP); - __ Dsrl(out, TMP, 4); - __ Daddu(out, out, TMP); - __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL); - __ And(out, out, AT); - __ LoadConst64(TMP, 0x0101010101010101L); - __ Dmul(out, out, TMP); - __ Dsrl32(out, out, 24); - } - } -} - -// int java.lang.Integer.bitCount(int) -void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, HasMsa(), GetAssembler()); -} - -// int java.lang.Long.bitCount(long) -void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, HasMsa(), GetAssembler()); -} - -// double java.lang.Math.sqrt(double) -void IntrinsicLocationsBuilderMIPS64::VisitMathSqrt(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathSqrt(HInvoke* invoke) { - LocationSummary* locations = invoke->GetLocations(); - Mips64Assembler* assembler = GetAssembler(); - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - __ SqrtD(out, in); -} - -static void CreateFPToFP(ArenaAllocator* allocator, - HInvoke* invoke, - Location::OutputOverlap overlaps = Location::kOutputOverlap) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), overlaps); -} - -// double java.lang.Math.rint(double) -void IntrinsicLocationsBuilderMIPS64::VisitMathRint(HInvoke* invoke) { - CreateFPToFP(allocator_, invoke, Location::kNoOutputOverlap); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathRint(HInvoke* invoke) { - LocationSummary* locations = invoke->GetLocations(); - Mips64Assembler* assembler = GetAssembler(); - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - __ RintD(out, in); -} - -// double java.lang.Math.floor(double) -void IntrinsicLocationsBuilderMIPS64::VisitMathFloor(HInvoke* invoke) { - CreateFPToFP(allocator_, invoke); -} - -const constexpr uint16_t kFPLeaveUnchanged = kPositiveZero | - kPositiveInfinity | - kNegativeZero | - kNegativeInfinity | - kQuietNaN | - kSignalingNaN; - -enum FloatRoundingMode { - kFloor, - kCeil, -}; - -static void GenRoundingMode(LocationSummary* locations, - FloatRoundingMode mode, - Mips64Assembler* assembler) { - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - DCHECK_NE(in, out); - - Mips64Label done; - - // double floor/ceil(double in) { - // if in.isNaN || in.isInfinite || in.isZero { - // return in; - // } - __ ClassD(out, in); - __ Dmfc1(AT, out); - __ Andi(AT, AT, kFPLeaveUnchanged); // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN - __ MovD(out, in); - __ Bnezc(AT, &done); - - // Long outLong = floor/ceil(in); - // if (outLong == Long.MAX_VALUE) || (outLong == Long.MIN_VALUE) { - // // floor()/ceil() has almost certainly returned a value - // // which can't be successfully represented as a signed - // // 64-bit number. Java expects that the input value will - // // be returned in these cases. - // // There is also a small probability that floor(in)/ceil(in) - // // correctly truncates/rounds up the input value to - // // Long.MAX_VALUE or Long.MIN_VALUE. In these cases, this - // // exception handling code still does the correct thing. - // return in; - // } - if (mode == kFloor) { - __ FloorLD(out, in); - } else if (mode == kCeil) { - __ CeilLD(out, in); - } - __ Dmfc1(AT, out); - __ MovD(out, in); - __ Daddiu(TMP, AT, 1); - __ Dati(TMP, 0x8000); // TMP = AT + 0x8000 0000 0000 0001 - // or AT - 0x7FFF FFFF FFFF FFFF. - // IOW, TMP = 1 if AT = Long.MIN_VALUE - // or TMP = 0 if AT = Long.MAX_VALUE. - __ Dsrl(TMP, TMP, 1); // TMP = 0 if AT = Long.MIN_VALUE - // or AT = Long.MAX_VALUE. - __ Beqzc(TMP, &done); - - // double out = outLong; - // return out; - __ Dmtc1(AT, out); - __ Cvtdl(out, out); - __ Bind(&done); - // } -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathFloor(HInvoke* invoke) { - GenRoundingMode(invoke->GetLocations(), kFloor, GetAssembler()); -} - -// double java.lang.Math.ceil(double) -void IntrinsicLocationsBuilderMIPS64::VisitMathCeil(HInvoke* invoke) { - CreateFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathCeil(HInvoke* invoke) { - GenRoundingMode(invoke->GetLocations(), kCeil, GetAssembler()); -} - -static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, DataType::Type type) { - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister half = locations->GetTemp(0).AsFpuRegister<FpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - DCHECK(type == DataType::Type::kFloat32 || type == DataType::Type::kFloat64); - - Mips64Label done; - - // out = floor(in); - // - // if (out != MAX_VALUE && out != MIN_VALUE) { - // TMP = ((in - out) >= 0.5) ? 1 : 0; - // return out += TMP; - // } - // return out; - - // out = floor(in); - if (type == DataType::Type::kFloat64) { - __ FloorLD(FTMP, in); - __ Dmfc1(out, FTMP); - } else { - __ FloorWS(FTMP, in); - __ Mfc1(out, FTMP); - } - - // if (out != MAX_VALUE && out != MIN_VALUE) - if (type == DataType::Type::kFloat64) { - __ Daddiu(TMP, out, 1); - __ Dati(TMP, 0x8000); // TMP = out + 0x8000 0000 0000 0001 - // or out - 0x7FFF FFFF FFFF FFFF. - // IOW, TMP = 1 if out = Long.MIN_VALUE - // or TMP = 0 if out = Long.MAX_VALUE. - __ Dsrl(TMP, TMP, 1); // TMP = 0 if out = Long.MIN_VALUE - // or out = Long.MAX_VALUE. - __ Beqzc(TMP, &done); - } else { - __ Addiu(TMP, out, 1); - __ Aui(TMP, TMP, 0x8000); // TMP = out + 0x8000 0001 - // or out - 0x7FFF FFFF. - // IOW, TMP = 1 if out = Int.MIN_VALUE - // or TMP = 0 if out = Int.MAX_VALUE. - __ Srl(TMP, TMP, 1); // TMP = 0 if out = Int.MIN_VALUE - // or out = Int.MAX_VALUE. - __ Beqzc(TMP, &done); - } - - // TMP = (0.5 <= (in - out)) ? -1 : 0; - if (type == DataType::Type::kFloat64) { - __ Cvtdl(FTMP, FTMP); // Convert output of floor.l.d back to "double". - __ LoadConst64(AT, bit_cast<int64_t, double>(0.5)); - __ SubD(FTMP, in, FTMP); - __ Dmtc1(AT, half); - __ CmpLeD(FTMP, half, FTMP); - __ Dmfc1(TMP, FTMP); - } else { - __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float". - __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f)); - __ SubS(FTMP, in, FTMP); - __ Mtc1(AT, half); - __ CmpLeS(FTMP, half, FTMP); - __ Mfc1(TMP, FTMP); - } - - // Return out -= TMP. - if (type == DataType::Type::kFloat64) { - __ Dsubu(out, out, TMP); - } else { - __ Subu(out, out, TMP); - } - - __ Bind(&done); -} - -// int java.lang.Math.round(float) -void IntrinsicLocationsBuilderMIPS64::VisitMathRoundFloat(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathRoundFloat(HInvoke* invoke) { - GenRound(invoke->GetLocations(), GetAssembler(), DataType::Type::kFloat32); -} - -// long java.lang.Math.round(double) -void IntrinsicLocationsBuilderMIPS64::VisitMathRoundDouble(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathRoundDouble(HInvoke* invoke) { - GenRound(invoke->GetLocations(), GetAssembler(), DataType::Type::kFloat64); -} - -// byte libcore.io.Memory.peekByte(long address) -void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekByte(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekByte(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); - - __ Lb(out, adr, 0); -} - -// short libcore.io.Memory.peekShort(long address) -void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); - - __ Lh(out, adr, 0); -} - -// int libcore.io.Memory.peekInt(long address) -void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); - - __ Lw(out, adr, 0); -} - -// long libcore.io.Memory.peekLong(long address) -void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); - - __ Ld(out, adr, 0); -} - -static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); -} - -// void libcore.io.Memory.pokeByte(long address, byte value) -void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeByte(HInvoke* invoke) { - CreateIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeByte(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); - GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); - - __ Sb(val, adr, 0); -} - -// void libcore.io.Memory.pokeShort(long address, short value) -void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); - GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); - - __ Sh(val, adr, 0); -} - -// void libcore.io.Memory.pokeInt(long address, int value) -void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); - GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); - - __ Sw(val, adr, 00); -} - -// void libcore.io.Memory.pokeLong(long address, long value) -void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); - GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); - - __ Sd(val, adr, 0); -} - -// Thread java.lang.Thread.currentThread() -void IntrinsicLocationsBuilderMIPS64::VisitThreadCurrentThread(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetOut(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorMIPS64::VisitThreadCurrentThread(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); - - __ LoadFromOffset(kLoadUnsignedWord, - out, - TR, - Thread::PeerOffset<kMips64PointerSize>().Int32Value()); -} - -static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, - HInvoke* invoke, - DataType::Type type) { - bool can_call = kEmitCompilerReadBarrier && - (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || - invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); - LocationSummary* locations = - new (allocator) LocationSummary(invoke, - can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall, - kIntrinsified); - if (can_call && kUseBakerReadBarrier) { - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - } - locations->SetInAt(0, Location::NoLocation()); // Unused receiver. - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetInAt(2, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), - (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); - if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); - } -} - -// Note that the caller must supply a properly aligned memory address. -// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). -static void GenUnsafeGet(HInvoke* invoke, - DataType::Type type, - bool is_volatile, - CodeGeneratorMIPS64* codegen) { - LocationSummary* locations = invoke->GetLocations(); - DCHECK((type == DataType::Type::kInt32) || - (type == DataType::Type::kInt64) || - (type == DataType::Type::kReference)) << type; - Mips64Assembler* assembler = codegen->GetAssembler(); - // Target register. - Location trg_loc = locations->Out(); - GpuRegister trg = trg_loc.AsRegister<GpuRegister>(); - // Object pointer. - Location base_loc = locations->InAt(1); - GpuRegister base = base_loc.AsRegister<GpuRegister>(); - // Long offset. - Location offset_loc = locations->InAt(2); - GpuRegister offset = offset_loc.AsRegister<GpuRegister>(); - - if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == DataType::Type::kReference))) { - __ Daddu(TMP, base, offset); - } - - switch (type) { - case DataType::Type::kInt64: - __ Ld(trg, TMP, 0); - if (is_volatile) { - __ Sync(0); - } - break; - - case DataType::Type::kInt32: - __ Lw(trg, TMP, 0); - if (is_volatile) { - __ Sync(0); - } - break; - - case DataType::Type::kReference: - if (kEmitCompilerReadBarrier) { - if (kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); - codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, - trg_loc, - base, - /* offset= */ 0U, - /* index= */ offset_loc, - TIMES_1, - temp, - /* needs_null_check= */ false); - if (is_volatile) { - __ Sync(0); - } - } else { - __ Lwu(trg, TMP, 0); - if (is_volatile) { - __ Sync(0); - } - codegen->GenerateReadBarrierSlow(invoke, - trg_loc, - trg_loc, - base_loc, - /* offset= */ 0U, - /* index= */ offset_loc); - } - } else { - __ Lwu(trg, TMP, 0); - if (is_volatile) { - __ Sync(0); - } - __ MaybeUnpoisonHeapReference(trg); - } - break; - - default: - LOG(FATAL) << "Unsupported op size " << type; - UNREACHABLE(); - } -} - -// int sun.misc.Unsafe.getInt(Object o, long offset) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_); -} - -// int sun.misc.Unsafe.getIntVolatile(Object o, long offset) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_); -} - -// long sun.misc.Unsafe.getLong(Object o, long offset) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_); -} - -// long sun.misc.Unsafe.getLongVolatile(Object o, long offset) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_); -} - -// Object sun.misc.Unsafe.getObject(Object o, long offset) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_); -} - -// Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_); -} - -static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::NoLocation()); // Unused receiver. - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetInAt(2, Location::RequiresRegister()); - locations->SetInAt(3, Location::RequiresRegister()); -} - -// Note that the caller must supply a properly aligned memory address. -// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). -static void GenUnsafePut(LocationSummary* locations, - DataType::Type type, - bool is_volatile, - bool is_ordered, - CodeGeneratorMIPS64* codegen) { - DCHECK((type == DataType::Type::kInt32) || - (type == DataType::Type::kInt64) || - (type == DataType::Type::kReference)); - Mips64Assembler* assembler = codegen->GetAssembler(); - // Object pointer. - GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>(); - // Long offset. - GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>(); - GpuRegister value = locations->InAt(3).AsRegister<GpuRegister>(); - - __ Daddu(TMP, base, offset); - if (is_volatile || is_ordered) { - __ Sync(0); - } - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kReference: - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - __ PoisonHeapReference(AT, value); - __ Sw(AT, TMP, 0); - } else { - __ Sw(value, TMP, 0); - } - break; - - case DataType::Type::kInt64: - __ Sd(value, TMP, 0); - break; - - default: - LOG(FATAL) << "Unsupported op size " << type; - UNREACHABLE(); - } - if (is_volatile) { - __ Sync(0); - } - - if (type == DataType::Type::kReference) { - bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(base, value, value_can_be_null); - } -} - -// void sun.misc.Unsafe.putInt(Object o, long offset, int x) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafePut(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafePut(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kInt32, - /* is_volatile= */ false, - /* is_ordered= */ false, - codegen_); -} - -// void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kInt32, - /* is_volatile= */ false, - /* is_ordered= */ true, - codegen_); -} - -// void sun.misc.Unsafe.putIntVolatile(Object o, long offset, int x) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kInt32, - /* is_volatile= */ true, - /* is_ordered= */ false, - codegen_); -} - -// void sun.misc.Unsafe.putObject(Object o, long offset, Object x) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObject(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObject(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kReference, - /* is_volatile= */ false, - /* is_ordered= */ false, - codegen_); -} - -// void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kReference, - /* is_volatile= */ false, - /* is_ordered= */ true, - codegen_); -} - -// void sun.misc.Unsafe.putObjectVolatile(Object o, long offset, Object x) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kReference, - /* is_volatile= */ true, - /* is_ordered= */ false, - codegen_); -} - -// void sun.misc.Unsafe.putLong(Object o, long offset, long x) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLong(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLong(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kInt64, - /* is_volatile= */ false, - /* is_ordered= */ false, - codegen_); -} - -// void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kInt64, - /* is_volatile= */ false, - /* is_ordered= */ true, - codegen_); -} - -// void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - DataType::Type::kInt64, - /* is_volatile= */ true, - /* is_ordered= */ false, - codegen_); -} - -static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) { - bool can_call = kEmitCompilerReadBarrier && - kUseBakerReadBarrier && - (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); - LocationSummary* locations = - new (allocator) LocationSummary(invoke, - can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::NoLocation()); // Unused receiver. - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetInAt(2, Location::RequiresRegister()); - locations->SetInAt(3, Location::RequiresRegister()); - locations->SetInAt(4, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); - - // Temporary register used in CAS by (Baker) read barrier. - if (can_call) { - locations->AddTemp(Location::RequiresRegister()); - } -} - -// Note that the caller must supply a properly aligned memory address. -// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). -static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorMIPS64* codegen) { - Mips64Assembler* assembler = codegen->GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>(); - Location offset_loc = locations->InAt(2); - GpuRegister offset = offset_loc.AsRegister<GpuRegister>(); - GpuRegister expected = locations->InAt(3).AsRegister<GpuRegister>(); - GpuRegister value = locations->InAt(4).AsRegister<GpuRegister>(); - Location out_loc = locations->Out(); - GpuRegister out = out_loc.AsRegister<GpuRegister>(); - - DCHECK_NE(base, out); - DCHECK_NE(offset, out); - DCHECK_NE(expected, out); - - if (type == DataType::Type::kReference) { - // The only read barrier implementation supporting the - // UnsafeCASObject intrinsic is the Baker-style read barriers. - DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - - // Mark card for object assuming new value is stored. Worst case we will mark an unchanged - // object and scan the receiver at the next GC for nothing. - bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(base, value, value_can_be_null); - - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); - // Need to make sure the reference stored in the field is a to-space - // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, - out_loc, // Unused, used only as a "temporary" within the read barrier. - base, - /* offset= */ 0u, - /* index= */ offset_loc, - ScaleFactor::TIMES_1, - temp, - /* needs_null_check= */ false, - /* always_update_field= */ true); - } - } - - Mips64Label loop_head, exit_loop; - __ Daddu(TMP, base, offset); - - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - __ PoisonHeapReference(expected); - // Do not poison `value`, if it is the same register as - // `expected`, which has just been poisoned. - if (value != expected) { - __ PoisonHeapReference(value); - } - } - - // do { - // tmp_value = [tmp_ptr] - expected; - // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); - // result = tmp_value != 0; - - __ Sync(0); - __ Bind(&loop_head); - if (type == DataType::Type::kInt64) { - __ Lld(out, TMP); - } else { - // Note: We will need a read barrier here, when read barrier - // support is added to the MIPS64 back end. - __ Ll(out, TMP); - if (type == DataType::Type::kReference) { - // The LL instruction sign-extends the 32-bit value, but - // 32-bit references must be zero-extended. Zero-extend `out`. - __ Dext(out, out, 0, 32); - } - } - __ Dsubu(out, out, expected); // If we didn't get the 'expected' - __ Sltiu(out, out, 1); // value, set 'out' to false, and - __ Beqzc(out, &exit_loop); // return. - __ Move(out, value); // Use 'out' for the 'store conditional' instruction. - // If we use 'value' directly, we would lose 'value' - // in the case that the store fails. Whether the - // store succeeds, or fails, it will load the - // correct Boolean value into the 'out' register. - if (type == DataType::Type::kInt64) { - __ Scd(out, TMP); - } else { - __ Sc(out, TMP); - } - __ Beqzc(out, &loop_head); // If we couldn't do the read-modify-write - // cycle atomically then retry. - __ Bind(&exit_loop); - __ Sync(0); - - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - __ UnpoisonHeapReference(expected); - // Do not unpoison `value`, if it is the same register as - // `expected`, which has just been unpoisoned. - if (value != expected) { - __ UnpoisonHeapReference(value); - } - } -} - -// boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke, DataType::Type::kInt32, codegen_); -} - -// boolean sun.misc.Unsafe.compareAndSwapLong(Object o, long offset, long expected, long x) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASLong(HInvoke* invoke) { - CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASLong(HInvoke* invoke) { - GenCas(invoke, DataType::Type::kInt64, codegen_); -} - -// boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x) -void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASObject(HInvoke* invoke) { - // The only read barrier implementation supporting the - // UnsafeCASObject intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { - return; - } - - CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) { - // The only read barrier implementation supporting the - // UnsafeCASObject intrinsic is the Baker-style read barriers. - DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - - GenCas(invoke, DataType::Type::kReference, codegen_); -} - -// int java.lang.String.compareTo(String anotherString) -void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) { - LocationSummary* locations = new (allocator_) LocationSummary( - invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); - locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); -} - -void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); - - GpuRegister argument = locations->InAt(1).AsRegister<GpuRegister>(); - SlowPathCodeMIPS64* slow_path = - new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS64(invoke); - codegen_->AddSlowPath(slow_path); - __ Beqzc(argument, slow_path->GetEntryLabel()); - - codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path); - __ Bind(slow_path->GetExitLabel()); -} - -// boolean java.lang.String.equals(Object anObject) -void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); - - // Temporary registers to store lengths of strings and for calculations. - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - GpuRegister str = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister arg = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>(); - GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>(); - GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>(); - - Mips64Label loop; - Mips64Label end; - Mips64Label return_true; - Mips64Label return_false; - - // Get offsets of count, value, and class fields within a string object. - const int32_t count_offset = mirror::String::CountOffset().Int32Value(); - const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); - const int32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - - // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); - - // If the register containing the pointer to "this", and the register - // containing the pointer to "anObject" are the same register then - // "this", and "anObject" are the same object and we can - // short-circuit the logic to a true result. - if (str == arg) { - __ LoadConst64(out, 1); - return; - } - - StringEqualsOptimizations optimizations(invoke); - if (!optimizations.GetArgumentNotNull()) { - // Check if input is null, return false if it is. - __ Beqzc(arg, &return_false); - } - - // Reference equality check, return true if same reference. - __ Beqc(str, arg, &return_true); - - if (!optimizations.GetArgumentIsString()) { - // Instanceof check for the argument by comparing class fields. - // All string objects must have the same type since String cannot be subclassed. - // Receiver must be a string object, so its class field is equal to all strings' class fields. - // If the argument is a string object, its class field must be equal to receiver's class field. - // - // As the String class is expected to be non-movable, we can read the class - // field from String.equals' arguments without read barriers. - AssertNonMovableStringClass(); - // /* HeapReference<Class> */ temp1 = str->klass_ - __ Lw(temp1, str, class_offset); - // /* HeapReference<Class> */ temp2 = arg->klass_ - __ Lw(temp2, arg, class_offset); - // Also, because we use the previously loaded class references only in the - // following comparison, we don't need to unpoison them. - __ Bnec(temp1, temp2, &return_false); - } - - // Load `count` fields of this and argument strings. - __ Lw(temp1, str, count_offset); - __ Lw(temp2, arg, count_offset); - // Check if `count` fields are equal, return false if they're not. - // Also compares the compression style, if differs return false. - __ Bnec(temp1, temp2, &return_false); - // Return true if both strings are empty. Even with string compression `count == 0` means empty. - static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, - "Expecting 0=compressed, 1=uncompressed"); - __ Beqzc(temp1, &return_true); - - // Don't overwrite input registers - __ Move(TMP, str); - __ Move(temp3, arg); - - // Assertions that must hold in order to compare strings 8 bytes at a time. - DCHECK_ALIGNED(value_offset, 8); - static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); - - if (mirror::kUseStringCompression) { - // For string compression, calculate the number of bytes to compare (not chars). - __ Dext(temp2, temp1, 0, 1); // Extract compression flag. - __ Srl(temp1, temp1, 1); // Extract length. - __ Sllv(temp1, temp1, temp2); // Double the byte count if uncompressed. - } - - // Loop to compare strings 8 bytes at a time starting at the beginning of the string. - // Ok to do this because strings are zero-padded to kObjectAlignment. - __ Bind(&loop); - __ Ld(out, TMP, value_offset); - __ Ld(temp2, temp3, value_offset); - __ Bnec(out, temp2, &return_false); - __ Daddiu(TMP, TMP, 8); - __ Daddiu(temp3, temp3, 8); - // With string compression, we have compared 8 bytes, otherwise 4 chars. - __ Addiu(temp1, temp1, mirror::kUseStringCompression ? -8 : -4); - __ Bgtzc(temp1, &loop); - - // Return true and exit the function. - // If loop does not result in returning false, we return true. - __ Bind(&return_true); - __ LoadConst64(out, 1); - __ Bc(&end); - - // Return false and exit the function. - __ Bind(&return_false); - __ LoadConst64(out, 0); - __ Bind(&end); -} - -static void GenerateStringIndexOf(HInvoke* invoke, - Mips64Assembler* assembler, - CodeGeneratorMIPS64* codegen, - bool start_at_zero) { - LocationSummary* locations = invoke->GetLocations(); - GpuRegister tmp_reg = start_at_zero ? locations->GetTemp(0).AsRegister<GpuRegister>() : TMP; - - // Note that the null check must have been done earlier. - DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); - - // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, - // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. - SlowPathCodeMIPS64* slow_path = nullptr; - HInstruction* code_point = invoke->InputAt(1); - if (code_point->IsIntConstant()) { - if (!IsUint<16>(code_point->AsIntConstant()->GetValue())) { - // Always needs the slow-path. We could directly dispatch to it, - // but this case should be rare, so for simplicity just put the - // full slow-path down and branch unconditionally. - slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathMIPS64(invoke); - codegen->AddSlowPath(slow_path); - __ Bc(slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - return; - } - } else if (code_point->GetType() != DataType::Type::kUint16) { - GpuRegister char_reg = locations->InAt(1).AsRegister<GpuRegister>(); - __ LoadConst32(tmp_reg, std::numeric_limits<uint16_t>::max()); - slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathMIPS64(invoke); - codegen->AddSlowPath(slow_path); - __ Bltuc(tmp_reg, char_reg, slow_path->GetEntryLabel()); // UTF-16 required - } - - if (start_at_zero) { - DCHECK_EQ(tmp_reg, A2); - // Start-index = 0. - __ Clear(tmp_reg); - } - - codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); - CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>(); - - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } -} - -// int java.lang.String.indexOf(int ch) -void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) { - LocationSummary* locations = new (allocator_) LocationSummary( - invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); - // We have a hand-crafted assembly stub that follows the runtime - // calling convention. So it's best to align the inputs accordingly. - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); - locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); - - // Need a temp for slow-path codepoint compare, and need to send start-index=0. - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); -} - -void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true); -} - -// int java.lang.String.indexOf(int ch, int fromIndex) -void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { - LocationSummary* locations = new (allocator_) LocationSummary( - invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); - // We have a hand-crafted assembly stub that follows the runtime - // calling convention. So it's best to align the inputs accordingly. - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); - locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); -} - -void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false); -} - -// java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount) -void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) { - LocationSummary* locations = new (allocator_) LocationSummary( - invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); - Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); - locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); -} - -void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - GpuRegister byte_array = locations->InAt(0).AsRegister<GpuRegister>(); - SlowPathCodeMIPS64* slow_path = - new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS64(invoke); - codegen_->AddSlowPath(slow_path); - __ Beqzc(byte_array, slow_path->GetEntryLabel()); - - codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); - CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); - __ Bind(slow_path->GetExitLabel()); -} - -// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) -void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); - locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); -} - -void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) { - // No need to emit code checking whether `locations->InAt(2)` is a null - // pointer, as callers of the native method - // - // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) - // - // all include a null check on `data` before calling that method. - codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); -} - -// java.lang.StringFactory.newStringFromString(String toCopy) -void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) { - LocationSummary* locations = new (allocator_) LocationSummary( - invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); - locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); -} - -void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromString(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - GpuRegister string_to_copy = locations->InAt(0).AsRegister<GpuRegister>(); - SlowPathCodeMIPS64* slow_path = - new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS64(invoke); - codegen_->AddSlowPath(slow_path); - __ Beqzc(string_to_copy, slow_path->GetEntryLabel()); - - codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path); - CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); - __ Bind(slow_path->GetExitLabel()); -} - -static void GenIsInfinite(LocationSummary* locations, - bool is64bit, - Mips64Assembler* assembler) { - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (is64bit) { - __ ClassD(FTMP, in); - } else { - __ ClassS(FTMP, in); - } - __ Mfc1(out, FTMP); - __ Andi(out, out, kPositiveInfinity | kNegativeInfinity); - __ Sltu(out, ZERO, out); -} - -// boolean java.lang.Float.isInfinite(float) -void IntrinsicLocationsBuilderMIPS64::VisitFloatIsInfinite(HInvoke* invoke) { - CreateFPToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitFloatIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); -} - -// boolean java.lang.Double.isInfinite(double) -void IntrinsicLocationsBuilderMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { - CreateFPToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); -} - -// void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) -void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetInAt(2, Location::RequiresRegister()); - locations->SetInAt(3, Location::RequiresRegister()); - locations->SetInAt(4, Location::RequiresRegister()); - - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = DataType::Size(DataType::Type::kUint16); - DCHECK_EQ(char_size, 2u); - const size_t char_shift = DataType::SizeShift(DataType::Type::kUint16); - - GpuRegister srcObj = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister srcBegin = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister srcEnd = locations->InAt(2).AsRegister<GpuRegister>(); - GpuRegister dstObj = locations->InAt(3).AsRegister<GpuRegister>(); - GpuRegister dstBegin = locations->InAt(4).AsRegister<GpuRegister>(); - - GpuRegister dstPtr = locations->GetTemp(0).AsRegister<GpuRegister>(); - GpuRegister srcPtr = locations->GetTemp(1).AsRegister<GpuRegister>(); - GpuRegister numChrs = locations->GetTemp(2).AsRegister<GpuRegister>(); - - Mips64Label done; - Mips64Label loop; - - // Location of data in char array buffer. - const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); - - // Get offset of value field within a string object. - const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); - - __ Beqc(srcEnd, srcBegin, &done); // No characters to move. - - // Calculate number of characters to be copied. - __ Dsubu(numChrs, srcEnd, srcBegin); - - // Calculate destination address. - __ Daddiu(dstPtr, dstObj, data_offset); - __ Dlsa(dstPtr, dstBegin, dstPtr, char_shift); - - if (mirror::kUseStringCompression) { - Mips64Label uncompressed_copy, compressed_loop; - const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); - // Load count field and extract compression flag. - __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset); - __ Dext(TMP, TMP, 0, 1); - - // If string is uncompressed, use uncompressed path. - __ Bnezc(TMP, &uncompressed_copy); - - // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. - __ Daddu(srcPtr, srcObj, srcBegin); - __ Bind(&compressed_loop); - __ LoadFromOffset(kLoadUnsignedByte, TMP, srcPtr, value_offset); - __ StoreToOffset(kStoreHalfword, TMP, dstPtr, 0); - __ Daddiu(numChrs, numChrs, -1); - __ Daddiu(srcPtr, srcPtr, 1); - __ Daddiu(dstPtr, dstPtr, 2); - __ Bnezc(numChrs, &compressed_loop); - - __ Bc(&done); - __ Bind(&uncompressed_copy); - } - - // Calculate source address. - __ Daddiu(srcPtr, srcObj, value_offset); - __ Dlsa(srcPtr, srcBegin, srcPtr, char_shift); - - __ Bind(&loop); - __ Lh(AT, srcPtr, 0); - __ Daddiu(numChrs, numChrs, -1); - __ Daddiu(srcPtr, srcPtr, char_size); - __ Sh(AT, dstPtr, 0); - __ Daddiu(dstPtr, dstPtr, char_size); - __ Bnezc(numChrs, &loop); - - __ Bind(&done); -} - -// static void java.lang.System.arraycopy(Object src, int srcPos, -// Object dest, int destPos, -// int length) -void IntrinsicLocationsBuilderMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) { - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); - - // As long as we are checking, we might as well check to see if the src and dest - // positions are >= 0. - if ((src_pos != nullptr && src_pos->GetValue() < 0) || - (dest_pos != nullptr && dest_pos->GetValue() < 0)) { - // We will have to fail anyways. - return; - } - - // And since we are already checking, check the length too. - if (length != nullptr) { - int32_t len = length->GetValue(); - if (len < 0) { - // Just call as normal. - return; - } - } - - // Okay, it is safe to generate inline code. - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); - // arraycopy(Object src, int srcPos, Object dest, int destPos, int length). - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetInAt(2, Location::RequiresRegister()); - locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); - locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); - - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); -} - -// Utility routine to verify that "length(input) - pos >= length" -static void EnoughItems(Mips64Assembler* assembler, - GpuRegister length_input_minus_pos, - Location length, - SlowPathCodeMIPS64* slow_path) { - if (length.IsConstant()) { - int32_t length_constant = length.GetConstant()->AsIntConstant()->GetValue(); - - if (IsInt<16>(length_constant)) { - __ Slti(TMP, length_input_minus_pos, length_constant); - __ Bnezc(TMP, slow_path->GetEntryLabel()); - } else { - __ LoadConst32(TMP, length_constant); - __ Bltc(length_input_minus_pos, TMP, slow_path->GetEntryLabel()); - } - } else { - __ Bltc(length_input_minus_pos, length.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); - } -} - -static void CheckPosition(Mips64Assembler* assembler, - Location pos, - GpuRegister input, - Location length, - SlowPathCodeMIPS64* slow_path, - bool length_is_input_length = false) { - // Where is the length in the Array? - const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); - - // Calculate length(input) - pos. - if (pos.IsConstant()) { - int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); - if (pos_const == 0) { - if (!length_is_input_length) { - // Check that length(input) >= length. - __ LoadFromOffset(kLoadWord, AT, input, length_offset); - EnoughItems(assembler, AT, length, slow_path); - } - } else { - // Check that (length(input) - pos) >= zero. - __ LoadFromOffset(kLoadWord, AT, input, length_offset); - DCHECK_GT(pos_const, 0); - __ Addiu32(AT, AT, -pos_const); - __ Bltzc(AT, slow_path->GetEntryLabel()); - - // Verify that (length(input) - pos) >= length. - EnoughItems(assembler, AT, length, slow_path); - } - } else if (length_is_input_length) { - // The only way the copy can succeed is if pos is zero. - GpuRegister pos_reg = pos.AsRegister<GpuRegister>(); - __ Bnezc(pos_reg, slow_path->GetEntryLabel()); - } else { - // Verify that pos >= 0. - GpuRegister pos_reg = pos.AsRegister<GpuRegister>(); - __ Bltzc(pos_reg, slow_path->GetEntryLabel()); - - // Check that (length(input) - pos) >= zero. - __ LoadFromOffset(kLoadWord, AT, input, length_offset); - __ Subu(AT, AT, pos_reg); - __ Bltzc(AT, slow_path->GetEntryLabel()); - - // Verify that (length(input) - pos) >= length. - EnoughItems(assembler, AT, length, slow_path); - } -} - -void IntrinsicCodeGeneratorMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); - Location src_pos = locations->InAt(1); - GpuRegister dest = locations->InAt(2).AsRegister<GpuRegister>(); - Location dest_pos = locations->InAt(3); - Location length = locations->InAt(4); - - Mips64Label loop; - - GpuRegister dest_base = locations->GetTemp(0).AsRegister<GpuRegister>(); - GpuRegister src_base = locations->GetTemp(1).AsRegister<GpuRegister>(); - GpuRegister count = locations->GetTemp(2).AsRegister<GpuRegister>(); - - SlowPathCodeMIPS64* slow_path = - new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS64(invoke); - codegen_->AddSlowPath(slow_path); - - // Bail out if the source and destination are the same (to handle overlap). - __ Beqc(src, dest, slow_path->GetEntryLabel()); - - // Bail out if the source is null. - __ Beqzc(src, slow_path->GetEntryLabel()); - - // Bail out if the destination is null. - __ Beqzc(dest, slow_path->GetEntryLabel()); - - // Load length into register for count. - if (length.IsConstant()) { - __ LoadConst32(count, length.GetConstant()->AsIntConstant()->GetValue()); - } else { - // If the length is negative, bail out. - // We have already checked in the LocationsBuilder for the constant case. - __ Bltzc(length.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); - - __ Move(count, length.AsRegister<GpuRegister>()); - } - - // Validity checks: source. - CheckPosition(assembler, src_pos, src, Location::RegisterLocation(count), slow_path); - - // Validity checks: dest. - CheckPosition(assembler, dest_pos, dest, Location::RegisterLocation(count), slow_path); - - // If count is zero, we're done. - __ Beqzc(count, slow_path->GetExitLabel()); - - // Okay, everything checks out. Finally time to do the copy. - // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = DataType::Size(DataType::Type::kUint16); - DCHECK_EQ(char_size, 2u); - - const size_t char_shift = DataType::SizeShift(DataType::Type::kUint16); - - const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); - - // Calculate source and destination addresses. - if (src_pos.IsConstant()) { - int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue(); - - __ Daddiu64(src_base, src, data_offset + char_size * src_pos_const, TMP); - } else { - __ Daddiu64(src_base, src, data_offset, TMP); - __ Dlsa(src_base, src_pos.AsRegister<GpuRegister>(), src_base, char_shift); - } - if (dest_pos.IsConstant()) { - int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - - __ Daddiu64(dest_base, dest, data_offset + char_size * dest_pos_const, TMP); - } else { - __ Daddiu64(dest_base, dest, data_offset, TMP); - __ Dlsa(dest_base, dest_pos.AsRegister<GpuRegister>(), dest_base, char_shift); - } - - __ Bind(&loop); - __ Lh(TMP, src_base, 0); - __ Daddiu(src_base, src_base, char_size); - __ Daddiu(count, count, -1); - __ Sh(TMP, dest_base, 0); - __ Daddiu(dest_base, dest_base, char_size); - __ Bnezc(count, &loop); - - __ Bind(slow_path->GetExitLabel()); -} - -static void GenHighestOneBit(LocationSummary* locations, - DataType::Type type, - Mips64Assembler* assembler) { - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type; - - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (type == DataType::Type::kInt64) { - __ Dclz(TMP, in); - __ LoadConst64(AT, INT64_C(0x8000000000000000)); - __ Dsrlv(AT, AT, TMP); - } else { - __ Clz(TMP, in); - __ LoadConst32(AT, 0x80000000); - __ Srlv(AT, AT, TMP); - } - // For either value of "type", when "in" is zero, "out" should also - // be zero. Without this extra "and" operation, when "in" is zero, - // "out" would be either Integer.MIN_VALUE, or Long.MIN_VALUE because - // the MIPS logical shift operations "dsrlv", and "srlv" don't use - // the shift amount (TMP) directly; they use either (TMP % 64) or - // (TMP % 32), respectively. - __ And(out, AT, in); -} - -// int java.lang.Integer.highestOneBit(int) -void IntrinsicLocationsBuilderMIPS64::VisitIntegerHighestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitIntegerHighestOneBit(HInvoke* invoke) { - GenHighestOneBit(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); -} - -// long java.lang.Long.highestOneBit(long) -void IntrinsicLocationsBuilderMIPS64::VisitLongHighestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitLongHighestOneBit(HInvoke* invoke) { - GenHighestOneBit(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); -} - -static void GenLowestOneBit(LocationSummary* locations, - DataType::Type type, - Mips64Assembler* assembler) { - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type; - - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (type == DataType::Type::kInt64) { - __ Dsubu(TMP, ZERO, in); - } else { - __ Subu(TMP, ZERO, in); - } - __ And(out, TMP, in); -} - -// int java.lang.Integer.lowestOneBit(int) -void IntrinsicLocationsBuilderMIPS64::VisitIntegerLowestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitIntegerLowestOneBit(HInvoke* invoke) { - GenLowestOneBit(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); -} - -// long java.lang.Long.lowestOneBit(long) -void IntrinsicLocationsBuilderMIPS64::VisitLongLowestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitLongLowestOneBit(HInvoke* invoke) { - GenLowestOneBit(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); -} - -static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); - InvokeRuntimeCallingConvention calling_convention; - - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kFloat64)); -} - -static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); - InvokeRuntimeCallingConvention calling_convention; - - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kFloat64)); -} - -static void GenFPToFPCall(HInvoke* invoke, - CodeGeneratorMIPS64* codegen, - QuickEntrypointEnum entry) { - LocationSummary* locations = invoke->GetLocations(); - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - DCHECK_EQ(in, F12); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - DCHECK_EQ(out, F0); - - codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); -} - -static void GenFPFPToFPCall(HInvoke* invoke, - CodeGeneratorMIPS64* codegen, - QuickEntrypointEnum entry) { - LocationSummary* locations = invoke->GetLocations(); - FpuRegister in0 = locations->InAt(0).AsFpuRegister<FpuRegister>(); - DCHECK_EQ(in0, F12); - FpuRegister in1 = locations->InAt(1).AsFpuRegister<FpuRegister>(); - DCHECK_EQ(in1, F13); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - DCHECK_EQ(out, F0); - - codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); -} - -// static double java.lang.Math.cos(double a) -void IntrinsicLocationsBuilderMIPS64::VisitMathCos(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathCos(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickCos); -} - -// static double java.lang.Math.sin(double a) -void IntrinsicLocationsBuilderMIPS64::VisitMathSin(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathSin(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickSin); -} - -// static double java.lang.Math.acos(double a) -void IntrinsicLocationsBuilderMIPS64::VisitMathAcos(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAcos(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickAcos); -} - -// static double java.lang.Math.asin(double a) -void IntrinsicLocationsBuilderMIPS64::VisitMathAsin(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAsin(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickAsin); -} - -// static double java.lang.Math.atan(double a) -void IntrinsicLocationsBuilderMIPS64::VisitMathAtan(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAtan(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickAtan); -} - -// static double java.lang.Math.atan2(double y, double x) -void IntrinsicLocationsBuilderMIPS64::VisitMathAtan2(HInvoke* invoke) { - CreateFPFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAtan2(HInvoke* invoke) { - GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); -} - -// static double java.lang.Math.pow(double y, double x) -void IntrinsicLocationsBuilderMIPS64::VisitMathPow(HInvoke* invoke) { - CreateFPFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathPow(HInvoke* invoke) { - GenFPFPToFPCall(invoke, codegen_, kQuickPow); -} - -// static double java.lang.Math.cbrt(double a) -void IntrinsicLocationsBuilderMIPS64::VisitMathCbrt(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathCbrt(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickCbrt); -} - -// static double java.lang.Math.cosh(double x) -void IntrinsicLocationsBuilderMIPS64::VisitMathCosh(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathCosh(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickCosh); -} - -// static double java.lang.Math.exp(double a) -void IntrinsicLocationsBuilderMIPS64::VisitMathExp(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathExp(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickExp); -} - -// static double java.lang.Math.expm1(double x) -void IntrinsicLocationsBuilderMIPS64::VisitMathExpm1(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathExpm1(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickExpm1); -} - -// static double java.lang.Math.hypot(double x, double y) -void IntrinsicLocationsBuilderMIPS64::VisitMathHypot(HInvoke* invoke) { - CreateFPFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathHypot(HInvoke* invoke) { - GenFPFPToFPCall(invoke, codegen_, kQuickHypot); -} - -// static double java.lang.Math.log(double a) -void IntrinsicLocationsBuilderMIPS64::VisitMathLog(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathLog(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickLog); -} - -// static double java.lang.Math.log10(double x) -void IntrinsicLocationsBuilderMIPS64::VisitMathLog10(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathLog10(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickLog10); -} - -// static double java.lang.Math.nextAfter(double start, double direction) -void IntrinsicLocationsBuilderMIPS64::VisitMathNextAfter(HInvoke* invoke) { - CreateFPFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathNextAfter(HInvoke* invoke) { - GenFPFPToFPCall(invoke, codegen_, kQuickNextAfter); -} - -// static double java.lang.Math.sinh(double x) -void IntrinsicLocationsBuilderMIPS64::VisitMathSinh(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathSinh(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickSinh); -} - -// static double java.lang.Math.tan(double a) -void IntrinsicLocationsBuilderMIPS64::VisitMathTan(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathTan(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickTan); -} - -// static double java.lang.Math.tanh(double x) -void IntrinsicLocationsBuilderMIPS64::VisitMathTanh(HInvoke* invoke) { - CreateFPToFPCallLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathTanh(HInvoke* invoke) { - GenFPToFPCall(invoke, codegen_, kQuickTanh); -} - -// long java.lang.Integer.valueOf(long) -void IntrinsicLocationsBuilderMIPS64::VisitIntegerValueOf(HInvoke* invoke) { - InvokeRuntimeCallingConvention calling_convention; - IntrinsicVisitor::ComputeIntegerValueOfLocations( - invoke, - codegen_, - calling_convention.GetReturnLocation(DataType::Type::kReference), - Location::RegisterLocation(calling_convention.GetRegisterAt(0))); -} - -void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = - IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); - LocationSummary* locations = invoke->GetLocations(); - Mips64Assembler* assembler = GetAssembler(); - InstructionCodeGeneratorMIPS64* icodegen = - down_cast<InstructionCodeGeneratorMIPS64*>(codegen_->GetInstructionVisitor()); - - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - if (invoke->InputAt(0)->IsConstant()) { - int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (static_cast<uint32_t>(value - info.low) < info.length) { - // Just embed the j.l.Integer in the code. - DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); - codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); - } else { - DCHECK(locations->CanCall()); - // Allocate and initialize a new j.l.Integer. - // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the - // JIT object table. - codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), - info.integer_boot_image_offset); - __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP); - // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation - // one. - icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); - } - } else { - DCHECK(locations->CanCall()); - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - Mips64Label allocate, done; - - __ Addiu32(out, in, -info.low); - // As unsigned quantities is out < info.length ? - __ LoadConst32(AT, info.length); - // Branch if out >= info.length . This means that "in" is outside of the valid range. - __ Bgeuc(out, AT, &allocate); - - // If the value is within the bounds, load the j.l.Integer directly from the array. - codegen_->LoadBootImageAddress(TMP, info.array_data_boot_image_reference); - __ Dlsa(out, out, TMP, TIMES_4); - __ Lwu(out, out, 0); - __ MaybeUnpoisonHeapReference(out); - __ Bc(&done); - - __ Bind(&allocate); - // Otherwise allocate and initialize a new j.l.Integer. - codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), - info.integer_boot_image_offset); - __ StoreToOffset(kStoreWord, in, out, info.value_offset); - // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation - // one. - icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); - __ Bind(&done); - } -} - -// static boolean java.lang.Thread.interrupted() -void IntrinsicLocationsBuilderMIPS64::VisitThreadInterrupted(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetOut(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorMIPS64::VisitThreadInterrupted(HInvoke* invoke) { - Mips64Assembler* assembler = GetAssembler(); - GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); - int32_t offset = Thread::InterruptedOffset<kMips64PointerSize>().Int32Value(); - __ LoadFromOffset(kLoadWord, out, TR, offset); - Mips64Label done; - __ Beqzc(out, &done); - __ Sync(0); - __ StoreToOffset(kStoreWord, ZERO, TR, offset); - __ Sync(0); - __ Bind(&done); -} - -void IntrinsicLocationsBuilderMIPS64::VisitReachabilityFence(HInvoke* invoke) { - LocationSummary* locations = - new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::Any()); -} - -void IntrinsicCodeGeneratorMIPS64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } - -UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy) -UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32Update) -UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateBytes) -UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateByteBuffer) - -UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(MIPS64, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(MIPS64, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(MIPS64, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(MIPS64, StringBuilderAppend); -UNIMPLEMENTED_INTRINSIC(MIPS64, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(MIPS64, StringBuilderToString); - -// 1.8. -UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject) - -UNREACHABLE_INTRINSICS(MIPS64) - -#undef __ - -} // namespace mips64 -} // namespace art diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h deleted file mode 100644 index ca8bc8f55a..0000000000 --- a/compiler/optimizing/intrinsics_mips64.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_ -#define ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_ - -#include "intrinsics.h" - -namespace art { - -class ArenaAllocator; -class HInvokeStaticOrDirect; -class HInvokeVirtual; - -namespace mips64 { - -class CodeGeneratorMIPS64; -class Mips64Assembler; - -class IntrinsicLocationsBuilderMIPS64 final : public IntrinsicVisitor { - public: - explicit IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen); - - // Define visitor methods. - -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS - - // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether - // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to - // the invoke. - bool TryDispatch(HInvoke* invoke); - - private: - CodeGeneratorMIPS64* const codegen_; - ArenaAllocator* const allocator_; - - DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64); -}; - -class IntrinsicCodeGeneratorMIPS64 final : public IntrinsicVisitor { - public: - explicit IntrinsicCodeGeneratorMIPS64(CodeGeneratorMIPS64* codegen) : codegen_(codegen) {} - - // Define visitor methods. - -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS - - bool HasMsa() const; - - private: - Mips64Assembler* GetAssembler(); - - ArenaAllocator* GetAllocator(); - - CodeGeneratorMIPS64* const codegen_; - - DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorMIPS64); -}; - -} // namespace mips64 -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_ diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index de697f0f96..6d7462e3c1 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -3081,13 +3081,31 @@ UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(X86, CRC32Update) UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer) +UNIMPLEMENTED_INTRINSIC(X86, FP16ToFloat) +UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf) +UNIMPLEMENTED_INTRINSIC(X86, FP16Floor) +UNIMPLEMENTED_INTRINSIC(X86, FP16Ceil) +UNIMPLEMENTED_INTRINSIC(X86, FP16Rint) +UNIMPLEMENTED_INTRINSIC(X86, FP16Greater) +UNIMPLEMENTED_INTRINSIC(X86, FP16GreaterEquals) +UNIMPLEMENTED_INTRINSIC(X86, FP16Less) +UNIMPLEMENTED_INTRINSIC(X86, FP16LessEquals) UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend); UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength); UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppend); +UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendObject); +UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendString); +UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharSequence); +UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharArray); +UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendBoolean); +UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendChar); +UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendInt); +UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendLong); +UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendFloat); +UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendDouble); UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength); UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index e79c0c9adf..0f6b00653d 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2748,13 +2748,31 @@ UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update) UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToFloat) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToHalf) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16Floor) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16Ceil) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16Rint) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16Greater) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16GreaterEquals) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16Less) +UNIMPLEMENTED_INTRINSIC(X86_64, FP16LessEquals) UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter); UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend); UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength); UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppend); +UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendObject); +UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendString); +UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharSequence); +UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharArray); +UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendBoolean); +UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendChar); +UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendInt); +UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendLong); +UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendFloat); +UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendDouble); UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength); UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString); diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc index bfe7a4f72f..d725aba9c8 100644 --- a/compiler/optimizing/load_store_analysis_test.cc +++ b/compiler/optimizing/load_store_analysis_test.cc @@ -106,6 +106,8 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3)); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3)); + + EXPECT_TRUE(CheckGraph(graph_)); } TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { @@ -183,6 +185,8 @@ TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { ASSERT_TRUE(loc1 != loc2); // accesses to different fields of the same object should not alias. ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + + EXPECT_TRUE(CheckGraph(graph_)); } TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { @@ -273,6 +277,8 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { loc1 = heap_location_collector.GetArrayHeapLocation(arr_set4); loc2 = heap_location_collector.GetArrayHeapLocation(arr_set8); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); + + EXPECT_TRUE(CheckGraphSkipRefTypeInfoChecks(graph_)); } TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) { diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index b33d0f488e..4c150dacea 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -23,8 +23,6 @@ #include "load_store_analysis.h" #include "side_effects_analysis.h" -#include <iostream> - /** * The general algorithm of load-store elimination (LSE). * Load-store analysis in the previous pass collects a list of heap locations @@ -64,8 +62,9 @@ * all the heap values, depending on the instruction's side effects. * - Finalizable objects are considered as persisting at method * return/deoptimization. - * - Currently this LSE algorithm doesn't handle SIMD graph, e.g. with VecLoad - * and VecStore instructions. + * - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any + * partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as + * alias and no load/store is eliminated in such case. * - Currently this LSE algorithm doesn't handle graph with try-catch, due to * the special block merging structure. */ @@ -172,9 +171,7 @@ class LSEVisitor : public HGraphDelegateVisitor { DCHECK(substitute2->IsTypeConversion()); continue; } - DCHECK(load2->IsInstanceFieldGet() || - load2->IsStaticFieldGet() || - load2->IsArrayGet()); + DCHECK(IsLoad(load2)); DCHECK(substitute2 != nullptr); if (substitute2 == substitute && load2->GetType() == load->GetType() && @@ -204,9 +201,7 @@ class LSEVisitor : public HGraphDelegateVisitor { DCHECK(substitute_instructions_for_loads_[i]->IsTypeConversion()); continue; } - DCHECK(load->IsInstanceFieldGet() || - load->IsStaticFieldGet() || - load->IsArrayGet()); + DCHECK(IsLoad(load)); HInstruction* substitute = substitute_instructions_for_loads_[i]; DCHECK(substitute != nullptr); // We proactively retrieve the substitute for a removed load, so @@ -224,7 +219,7 @@ class LSEVisitor : public HGraphDelegateVisitor { // We guarantee that type A stored as type B and then fetched out as // type C is the same as casting from type A to type C directly, since // type B and type C will have the same size which is guarenteed in - // HInstanceFieldGet/HStaticFieldGet/HArrayGet's SetType(). + // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType(). // So we only need one type conversion from type A to type C. HTypeConversion* type_conversion = AddTypeConversionIfNecessary( load, substitute, load->GetType()); @@ -240,7 +235,7 @@ class LSEVisitor : public HGraphDelegateVisitor { // At this point, stores in possibly_removed_stores_ can be safely removed. for (HInstruction* store : possibly_removed_stores_) { - DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet()); + DCHECK(IsStore(store)); store->GetBlock()->RemoveInstruction(store); } @@ -261,26 +256,37 @@ class LSEVisitor : public HGraphDelegateVisitor { } private: - static bool IsLoad(HInstruction* instruction) { + static bool IsLoad(const HInstruction* instruction) { if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) { return false; } // Unresolved load is not treated as a load. return instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet() || + instruction->IsVecLoad() || instruction->IsArrayGet(); } - static bool IsStore(HInstruction* instruction) { + static bool IsStore(const HInstruction* instruction) { if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) { return false; } // Unresolved store is not treated as a store. return instruction->IsInstanceFieldSet() || instruction->IsArraySet() || + instruction->IsVecStore() || instruction->IsStaticFieldSet(); } + // Check if it is allowed to use default values for the specified load. + static bool IsDefaultAllowedForLoad(const HInstruction* load) { + DCHECK(IsLoad(load)); + // Using defaults for VecLoads requires to create additional vector operations. + // As there are some issues with scheduling vector operations it is better to avoid creating + // them. + return !load->IsVecOperation(); + } + // Returns the real heap value by finding its substitute or by "peeling" // a store instruction. HInstruction* GetRealHeapValue(HInstruction* heap_value) { @@ -298,6 +304,8 @@ class LSEVisitor : public HGraphDelegateVisitor { heap_value = heap_value->AsInstanceFieldSet()->GetValue(); } else if (heap_value->IsStaticFieldSet()) { heap_value = heap_value->AsStaticFieldSet()->GetValue(); + } else if (heap_value->IsVecStore()) { + heap_value = heap_value->AsVecStore()->GetValue(); } else { DCHECK(heap_value->IsArraySet()); heap_value = heap_value->AsArraySet()->GetValue(); @@ -553,10 +561,15 @@ class LSEVisitor : public HGraphDelegateVisitor { heap_values_for_[instruction->GetBlock()->GetBlockId()]; HInstruction* heap_value = heap_values[idx]; if (heap_value == kDefaultHeapValue) { - HInstruction* constant = GetDefaultValue(instruction->GetType()); - AddRemovedLoad(instruction, constant); - heap_values[idx] = constant; - return; + if (IsDefaultAllowedForLoad(instruction)) { + HInstruction* constant = GetDefaultValue(instruction->GetType()); + AddRemovedLoad(instruction, constant); + heap_values[idx] = constant; + return; + } else { + heap_values[idx] = kUnknownHeapValue; + heap_value = kUnknownHeapValue; + } } heap_value = GetRealHeapValue(heap_value); if (heap_value == kUnknownHeapValue) { @@ -590,6 +603,35 @@ class LSEVisitor : public HGraphDelegateVisitor { return false; } + bool CanValueBeKeptIfSameAsNew(HInstruction* value, + HInstruction* new_value, + HInstruction* new_value_set_instr) { + // For field/array set location operations, if the value is the same as the new_value + // it can be kept even if aliasing happens. All aliased operations will access the same memory + // range. + // For vector values, this is not true. For example: + // packed_data = [0xA, 0xB, 0xC, 0xD]; <-- Different values in each lane. + // VecStore array[i ,i+1,i+2,i+3] = packed_data; + // VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap). + // VecLoad vx = array[i,i+1,i+2,i+3]; <-- Cannot be eliminated because the value + // here is not packed_data anymore. + // + // TODO: to allow such 'same value' optimization on vector data, + // LSA needs to report more fine-grain MAY alias information: + // (1) May alias due to two vector data partial overlap. + // e.g. a[i..i+3] and a[i+1,..,i+4]. + // (2) May alias due to two vector data may complete overlap each other. + // e.g. a[i..i+3] and b[i..i+3]. + // (3) May alias but the exact relationship between two locations is unknown. + // e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown. + // This 'same value' optimization can apply only on case (2). + if (new_value_set_instr->IsVecOperation()) { + return false; + } + + return Equal(value, new_value); + } + void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) { DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound); DCHECK(!IsStore(value)) << value->DebugName(); @@ -636,23 +678,16 @@ class LSEVisitor : public HGraphDelegateVisitor { // This store may kill values in other heap locations due to aliasing. for (size_t i = 0; i < heap_values.size(); i++) { - if (i == idx) { - continue; - } - if (Equal(heap_values[i], value)) { - // Same value should be kept even if aliasing happens. + if (i == idx || + heap_values[i] == kUnknownHeapValue || + CanValueBeKeptIfSameAsNew(heap_values[i], value, instruction) || + !heap_location_collector_.MayAlias(i, idx)) { continue; } - if (heap_values[i] == kUnknownHeapValue) { - // Value is already unknown, no need for aliasing check. - continue; - } - if (heap_location_collector_.MayAlias(i, idx)) { - // Kill heap locations that may alias and as a result if the heap value - // is a store, the store needs to be kept. - KeepIfIsStore(heap_values[i]); - heap_values[i] = kUnknownHeapValue; - } + // Kill heap locations that may alias and as a result if the heap value + // is a store, the store needs to be kept. + KeepIfIsStore(heap_values[i]); + heap_values[i] = kUnknownHeapValue; } } @@ -689,7 +724,16 @@ class LSEVisitor : public HGraphDelegateVisitor { void VisitArraySet(HArraySet* instruction) override { size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction); - VisitSetLocation(instruction, idx, instruction->InputAt(2)); + VisitSetLocation(instruction, idx, instruction->GetValue()); + } + + void VisitVecLoad(HVecLoad* instruction) override { + VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction)); + } + + void VisitVecStore(HVecStore* instruction) override { + size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction); + VisitSetLocation(instruction, idx, instruction->GetValue()); } void VisitDeoptimize(HDeoptimize* instruction) override { @@ -892,11 +936,6 @@ bool LoadStoreElimination::Run() { return false; } - // TODO: analyze VecLoad/VecStore better. - if (graph_->HasSIMD()) { - return false; - } - LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_, stats_); for (HBasicBlock* block : graph_->GetReversePostOrder()) { lse_visitor.VisitBasicBlock(block); diff --git a/compiler/optimizing/load_store_elimination_test.cc b/compiler/optimizing/load_store_elimination_test.cc new file mode 100644 index 0000000000..738037803e --- /dev/null +++ b/compiler/optimizing/load_store_elimination_test.cc @@ -0,0 +1,893 @@ +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <tuple> + +#include "load_store_analysis.h" +#include "load_store_elimination.h" +#include "nodes.h" +#include "optimizing_unit_test.h" +#include "side_effects_analysis.h" + +#include "gtest/gtest.h" + +namespace art { + +class LoadStoreEliminationTest : public ImprovedOptimizingUnitTest { + public: + void PerformLSE() { + graph_->BuildDominatorTree(); + SideEffectsAnalysis side_effects(graph_); + side_effects.Run(); + LoadStoreAnalysis lsa(graph_); + lsa.Run(); + LoadStoreElimination lse(graph_, side_effects, lsa, nullptr); + lse.Run(); + EXPECT_TRUE(CheckGraphSkipRefTypeInfoChecks()); + } + + // Create instructions shared among tests. + void CreateEntryBlockInstructions() { + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c4 = graph_->GetIntConstant(4); + i_add1_ = new (GetAllocator()) HAdd(DataType::Type::kInt32, i_, c1); + i_add4_ = new (GetAllocator()) HAdd(DataType::Type::kInt32, i_, c4); + entry_block_->AddInstruction(i_add1_); + entry_block_->AddInstruction(i_add4_); + entry_block_->AddInstruction(new (GetAllocator()) HGoto()); + } + + // Create the major CFG used by tests: + // entry + // | + // pre_header + // | + // loop[] + // | + // return + // | + // exit + void CreateTestControlFlowGraph() { + pre_header_ = new (GetAllocator()) HBasicBlock(graph_); + loop_ = new (GetAllocator()) HBasicBlock(graph_); + + graph_->AddBlock(pre_header_); + graph_->AddBlock(loop_); + + entry_block_->ReplaceSuccessor(return_block_, pre_header_); + pre_header_->AddSuccessor(loop_); + loop_->AddSuccessor(loop_); + loop_->AddSuccessor(return_block_); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c128 = graph_->GetIntConstant(128); + + CreateEntryBlockInstructions(); + + // pre_header block + // phi = 0; + phi_ = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); + loop_->AddPhi(phi_); + pre_header_->AddInstruction(new (GetAllocator()) HGoto()); + phi_->AddInput(c0); + + // loop block: + // suspend_check + // phi++; + // if (phi >= 128) + suspend_check_ = new (GetAllocator()) HSuspendCheck(); + HInstruction* inc_phi = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi_, c1); + HInstruction* cmp = new (GetAllocator()) HGreaterThanOrEqual(phi_, c128); + HInstruction* hif = new (GetAllocator()) HIf(cmp); + loop_->AddInstruction(suspend_check_); + loop_->AddInstruction(inc_phi); + loop_->AddInstruction(cmp); + loop_->AddInstruction(hif); + phi_->AddInput(inc_phi); + + CreateEnvForSuspendCheck(); + } + + void CreateEnvForSuspendCheck() { + ArenaVector<HInstruction*> current_locals({array_, i_, j_}, + GetAllocator()->Adapter(kArenaAllocInstruction)); + ManuallyBuildEnvFor(suspend_check_, ¤t_locals); + } + + // Create the diamond-shaped CFG: + // upper + // / \ + // left right + // \ / + // down + // + // Return: the basic blocks forming the CFG in the following order {upper, left, right, down}. + std::tuple<HBasicBlock*, HBasicBlock*, HBasicBlock*, HBasicBlock*> CreateDiamondShapedCFG() { + CreateEntryBlockInstructions(); + + HBasicBlock* upper = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* left = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* right = new (GetAllocator()) HBasicBlock(graph_); + + graph_->AddBlock(upper); + graph_->AddBlock(left); + graph_->AddBlock(right); + + entry_block_->ReplaceSuccessor(return_block_, upper); + upper->AddSuccessor(left); + upper->AddSuccessor(right); + left->AddSuccessor(return_block_); + right->AddSuccessor(return_block_); + + HInstruction* cmp = new (GetAllocator()) HGreaterThanOrEqual(i_, j_); + HInstruction* hif = new (GetAllocator()) HIf(cmp); + upper->AddInstruction(cmp); + upper->AddInstruction(hif); + + left->AddInstruction(new (GetAllocator()) HGoto()); + right->AddInstruction(new (GetAllocator()) HGoto()); + + return std::make_tuple(upper, left, right, return_block_); + } + + // Add a HVecLoad instruction to the end of the provided basic block. + // + // Return: the created HVecLoad instruction. + HInstruction* AddVecLoad(HBasicBlock* block, HInstruction* array, HInstruction* index) { + DCHECK(block != nullptr); + DCHECK(array != nullptr); + DCHECK(index != nullptr); + HInstruction* vload = new (GetAllocator()) HVecLoad( + GetAllocator(), + array, + index, + DataType::Type::kInt32, + SideEffects::ArrayReadOfType(DataType::Type::kInt32), + 4, + /*is_string_char_at*/ false, + kNoDexPc); + block->InsertInstructionBefore(vload, block->GetLastInstruction()); + return vload; + } + + // Add a HVecStore instruction to the end of the provided basic block. + // If no vdata is specified, generate HVecStore: array[index] = [1,1,1,1]. + // + // Return: the created HVecStore instruction. + HInstruction* AddVecStore(HBasicBlock* block, + HInstruction* array, + HInstruction* index, + HInstruction* vdata = nullptr) { + DCHECK(block != nullptr); + DCHECK(array != nullptr); + DCHECK(index != nullptr); + if (vdata == nullptr) { + HInstruction* c1 = graph_->GetIntConstant(1); + vdata = new (GetAllocator()) HVecReplicateScalar(GetAllocator(), + c1, + DataType::Type::kInt32, + 4, + kNoDexPc); + block->InsertInstructionBefore(vdata, block->GetLastInstruction()); + } + HInstruction* vstore = new (GetAllocator()) HVecStore( + GetAllocator(), + array, + index, + vdata, + DataType::Type::kInt32, + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + 4, + kNoDexPc); + block->InsertInstructionBefore(vstore, block->GetLastInstruction()); + return vstore; + } + + // Add a HArrayGet instruction to the end of the provided basic block. + // + // Return: the created HArrayGet instruction. + HInstruction* AddArrayGet(HBasicBlock* block, HInstruction* array, HInstruction* index) { + DCHECK(block != nullptr); + DCHECK(array != nullptr); + DCHECK(index != nullptr); + HInstruction* get = new (GetAllocator()) HArrayGet(array, index, DataType::Type::kInt32, 0); + block->InsertInstructionBefore(get, block->GetLastInstruction()); + return get; + } + + // Add a HArraySet instruction to the end of the provided basic block. + // If no data is specified, generate HArraySet: array[index] = 1. + // + // Return: the created HArraySet instruction. + HInstruction* AddArraySet(HBasicBlock* block, + HInstruction* array, + HInstruction* index, + HInstruction* data = nullptr) { + DCHECK(block != nullptr); + DCHECK(array != nullptr); + DCHECK(index != nullptr); + if (data == nullptr) { + data = graph_->GetIntConstant(1); + } + HInstruction* store = new (GetAllocator()) HArraySet(array, + index, + data, + DataType::Type::kInt32, + 0); + block->InsertInstructionBefore(store, block->GetLastInstruction()); + return store; + } + + void CreateParameters() override { + parameters_.push_back(new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kInt32)); + array_ = parameters_.back(); + parameters_.push_back(new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(1), + 1, + DataType::Type::kInt32)); + i_ = parameters_.back(); + parameters_.push_back(new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(1), + 2, + DataType::Type::kInt32)); + j_ = parameters_.back(); + } + + HBasicBlock* pre_header_; + HBasicBlock* loop_; + + HInstruction* array_; + HInstruction* i_; + HInstruction* j_; + HInstruction* i_add1_; + HInstruction* i_add4_; + HInstruction* suspend_check_; + + HPhi* phi_; +}; + +TEST_F(LoadStoreEliminationTest, ArrayGetSetElimination) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* c3 = graph_->GetIntConstant(3); + + // array[1] = 1; + // x = array[1]; <--- Remove. + // y = array[2]; + // array[1] = 1; <--- Remove, since it stores same value. + // array[i] = 3; <--- MAY alias. + // array[1] = 1; <--- Cannot remove, even if it stores the same value. + AddArraySet(entry_block_, array_, c1, c1); + HInstruction* load1 = AddArrayGet(entry_block_, array_, c1); + HInstruction* load2 = AddArrayGet(entry_block_, array_, c2); + HInstruction* store1 = AddArraySet(entry_block_, array_, c1, c1); + AddArraySet(entry_block_, array_, i_, c3); + HInstruction* store2 = AddArraySet(entry_block_, array_, c1, c1); + + PerformLSE(); + + ASSERT_TRUE(IsRemoved(load1)); + ASSERT_FALSE(IsRemoved(load2)); + ASSERT_TRUE(IsRemoved(store1)); + ASSERT_FALSE(IsRemoved(store2)); +} + +TEST_F(LoadStoreEliminationTest, SameHeapValue1) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(2); + + // Test LSE handling same value stores on array. + // array[1] = 1; + // array[2] = 1; + // array[1] = 1; <--- Can remove. + // array[1] = 2; <--- Can NOT remove. + AddArraySet(entry_block_, array_, c1, c1); + AddArraySet(entry_block_, array_, c2, c1); + HInstruction* store1 = AddArraySet(entry_block_, array_, c1, c1); + HInstruction* store2 = AddArraySet(entry_block_, array_, c1, c2); + + PerformLSE(); + + ASSERT_TRUE(IsRemoved(store1)); + ASSERT_FALSE(IsRemoved(store2)); +} + +TEST_F(LoadStoreEliminationTest, SameHeapValue2) { + InitGraph(); + CreateTestControlFlowGraph(); + + // Test LSE handling same value stores on vector. + // vdata = [0x1, 0x2, 0x3, 0x4, ...] + // VecStore array[i...] = vdata; + // VecStore array[j...] = vdata; <--- MAY ALIAS. + // VecStore array[i...] = vdata; <--- Cannot Remove, even if it's same value. + AddVecStore(entry_block_, array_, i_); + AddVecStore(entry_block_, array_, j_); + HInstruction* vstore = AddVecStore(entry_block_, array_, i_); + + PerformLSE(); + + ASSERT_FALSE(IsRemoved(vstore)); +} + +TEST_F(LoadStoreEliminationTest, SameHeapValue3) { + InitGraph(); + CreateTestControlFlowGraph(); + + // VecStore array[i...] = vdata; + // VecStore array[i+1...] = vdata; <--- MAY alias due to partial overlap. + // VecStore array[i...] = vdata; <--- Cannot remove, even if it's same value. + AddVecStore(entry_block_, array_, i_); + AddVecStore(entry_block_, array_, i_add1_); + HInstruction* vstore = AddVecStore(entry_block_, array_, i_); + + PerformLSE(); + + ASSERT_FALSE(IsRemoved(vstore)); +} + +TEST_F(LoadStoreEliminationTest, OverlappingLoadStore) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c1 = graph_->GetIntConstant(1); + + // Test LSE handling array LSE when there is vector store in between. + // a[i] = 1; + // .. = a[i]; <-- Remove. + // a[i,i+1,i+2,i+3] = data; <-- PARTIAL OVERLAP ! + // .. = a[i]; <-- Cannot remove. + AddArraySet(entry_block_, array_, i_, c1); + HInstruction* load1 = AddArrayGet(entry_block_, array_, i_); + AddVecStore(entry_block_, array_, i_); + HInstruction* load2 = AddArrayGet(entry_block_, array_, i_); + + // Test LSE handling vector load/store partial overlap. + // a[i,i+1,i+2,i+3] = data; + // a[i+4,i+5,i+6,i+7] = data; + // .. = a[i,i+1,i+2,i+3]; + // .. = a[i+4,i+5,i+6,i+7]; + // a[i+1,i+2,i+3,i+4] = data; <-- PARTIAL OVERLAP ! + // .. = a[i,i+1,i+2,i+3]; + // .. = a[i+4,i+5,i+6,i+7]; + AddVecStore(entry_block_, array_, i_); + AddVecStore(entry_block_, array_, i_add4_); + HInstruction* vload1 = AddVecLoad(entry_block_, array_, i_); + HInstruction* vload2 = AddVecLoad(entry_block_, array_, i_add4_); + AddVecStore(entry_block_, array_, i_add1_); + HInstruction* vload3 = AddVecLoad(entry_block_, array_, i_); + HInstruction* vload4 = AddVecLoad(entry_block_, array_, i_add4_); + + // Test LSE handling vector LSE when there is array store in between. + // a[i,i+1,i+2,i+3] = data; + // a[i+1] = 1; <-- PARTIAL OVERLAP ! + // .. = a[i,i+1,i+2,i+3]; + AddVecStore(entry_block_, array_, i_); + AddArraySet(entry_block_, array_, i_, c1); + HInstruction* vload5 = AddVecLoad(entry_block_, array_, i_); + + PerformLSE(); + + ASSERT_TRUE(IsRemoved(load1)); + ASSERT_FALSE(IsRemoved(load2)); + + ASSERT_TRUE(IsRemoved(vload1)); + ASSERT_TRUE(IsRemoved(vload2)); + ASSERT_FALSE(IsRemoved(vload3)); + ASSERT_FALSE(IsRemoved(vload4)); + + ASSERT_FALSE(IsRemoved(vload5)); +} +// function (int[] a, int j) { +// a[j] = 1; +// for (int i=0; i<128; i++) { +// /* doesn't do any write */ +// } +// a[j] = 1; +TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithoutSideEffects) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c1 = graph_->GetIntConstant(1); + + // a[j] = 1 + AddArraySet(pre_header_, array_, j_, c1); + + // LOOP BODY: + // .. = a[i,i+1,i+2,i+3]; + AddVecLoad(loop_, array_, phi_); + + // a[j] = 1; + HInstruction* array_set = AddArraySet(return_block_, array_, j_, c1); + + PerformLSE(); + + ASSERT_TRUE(IsRemoved(array_set)); +} + +// function (int[] a, int j) { +// int[] b = new int[128]; +// a[j] = 0; +// for (int phi=0; phi<128; phi++) { +// a[phi,phi+1,phi+2,phi+3] = [1,1,1,1]; +// b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3]; +// } +// a[j] = 0; +// } +TEST_F(LoadStoreEliminationTest, StoreAfterSIMDLoopWithSideEffects) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c128 = graph_->GetIntConstant(128); + + HInstruction* array_b = new (GetAllocator()) HNewArray(c0, c128, 0, 0); + pre_header_->InsertInstructionBefore(array_b, pre_header_->GetLastInstruction()); + array_b->CopyEnvironmentFrom(suspend_check_->GetEnvironment()); + + // a[j] = 0; + AddArraySet(pre_header_, array_, j_, c0); + + // LOOP BODY: + // a[phi,phi+1,phi+2,phi+3] = [1,1,1,1]; + // b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3]; + AddVecStore(loop_, array_, phi_); + HInstruction* vload = AddVecLoad(loop_, array_, phi_); + AddVecStore(loop_, array_b, phi_, vload->AsVecLoad()); + + // a[j] = 0; + HInstruction* a_set = AddArraySet(return_block_, array_, j_, c0); + + PerformLSE(); + + ASSERT_TRUE(IsRemoved(vload)); + ASSERT_FALSE(IsRemoved(a_set)); // Cannot remove due to write side-effect in the loop. +} + +// function (int[] a, int j) { +// int[] b = new int[128]; +// a[j] = 0; +// for (int phi=0; phi<128; phi++) { +// a[phi,phi+1,phi+2,phi+3] = [1,1,1,1]; +// b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3]; +// } +// x = a[j]; +// } +TEST_F(LoadStoreEliminationTest, LoadAfterSIMDLoopWithSideEffects) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c128 = graph_->GetIntConstant(128); + + HInstruction* array_b = new (GetAllocator()) HNewArray(c0, c128, 0, 0); + pre_header_->InsertInstructionBefore(array_b, pre_header_->GetLastInstruction()); + array_b->CopyEnvironmentFrom(suspend_check_->GetEnvironment()); + + // a[j] = 0; + AddArraySet(pre_header_, array_, j_, c0); + + // LOOP BODY: + // a[phi,phi+1,phi+2,phi+3] = [1,1,1,1]; + // b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3]; + AddVecStore(loop_, array_, phi_); + HInstruction* vload = AddVecLoad(loop_, array_, phi_); + AddVecStore(loop_, array_b, phi_, vload->AsVecLoad()); + + // x = a[j]; + HInstruction* load = AddArrayGet(return_block_, array_, j_); + + PerformLSE(); + + ASSERT_TRUE(IsRemoved(vload)); + ASSERT_FALSE(IsRemoved(load)); // Cannot remove due to write side-effect in the loop. +} + +// Check that merging works correctly when there are VecStors in predecessors. +// +// vstore1: a[i,... i + 3] = [1,...1] +// / \ +// / \ +// vstore2: a[i,... i + 3] = [1,...1] vstore3: a[i+1, ... i + 4] = [1, ... 1] +// \ / +// \ / +// vstore4: a[i,... i + 3] = [1,...1] +// +// Expected: +// 'vstore2' is removed. +// 'vstore3' is not removed. +// 'vstore4' is not removed. Such cases are not supported at the moment. +TEST_F(LoadStoreEliminationTest, MergePredecessorVecStores) { + InitGraph(); + + HBasicBlock* upper; + HBasicBlock* left; + HBasicBlock* right; + HBasicBlock* down; + std::tie(upper, left, right, down) = CreateDiamondShapedCFG(); + + // upper: a[i,... i + 3] = [1,...1] + HInstruction* vstore1 = AddVecStore(upper, array_, i_); + HInstruction* vdata = vstore1->InputAt(2); + + // left: a[i,... i + 3] = [1,...1] + HInstruction* vstore2 = AddVecStore(left, array_, i_, vdata); + + // right: a[i+1, ... i + 4] = [1, ... 1] + HInstruction* vstore3 = AddVecStore(right, array_, i_add1_, vdata); + + // down: a[i,... i + 3] = [1,...1] + HInstruction* vstore4 = AddVecStore(down, array_, i_, vdata); + + PerformLSE(); + + ASSERT_TRUE(IsRemoved(vstore2)); + ASSERT_FALSE(IsRemoved(vstore3)); + ASSERT_FALSE(IsRemoved(vstore4)); +} + +// Check that merging works correctly when there are ArraySets in predecessors. +// +// a[i] = 1 +// / \ +// / \ +// store1: a[i] = 1 store2: a[i+1] = 1 +// \ / +// \ / +// store3: a[i] = 1 +// +// Expected: +// 'store1' is removed. +// 'store2' is not removed. +// 'store3' is removed. +TEST_F(LoadStoreEliminationTest, MergePredecessorStores) { + InitGraph(); + + HBasicBlock* upper; + HBasicBlock* left; + HBasicBlock* right; + HBasicBlock* down; + std::tie(upper, left, right, down) = CreateDiamondShapedCFG(); + + // upper: a[i,... i + 3] = [1,...1] + AddArraySet(upper, array_, i_); + + // left: a[i,... i + 3] = [1,...1] + HInstruction* store1 = AddArraySet(left, array_, i_); + + // right: a[i+1, ... i + 4] = [1, ... 1] + HInstruction* store2 = AddArraySet(right, array_, i_add1_); + + // down: a[i,... i + 3] = [1,...1] + HInstruction* store3 = AddArraySet(down, array_, i_); + + PerformLSE(); + + ASSERT_TRUE(IsRemoved(store1)); + ASSERT_FALSE(IsRemoved(store2)); + ASSERT_TRUE(IsRemoved(store3)); +} + +// Check that redundant VStore/VLoad are removed from a SIMD loop. +// +// LOOP BODY +// vstore1: a[i,... i + 3] = [1,...1] +// vload: x = a[i,... i + 3] +// vstore2: b[i,... i + 3] = x +// vstore3: a[i,... i + 3] = [1,...1] +// +// Expected: +// 'vstore1' is not removed. +// 'vload' is removed. +// 'vstore3' is removed. +TEST_F(LoadStoreEliminationTest, RedundantVStoreVLoadInLoop) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c128 = graph_->GetIntConstant(128); + + HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0); + pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction()); + array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment()); + + HInstruction* array_b = new (GetAllocator()) HNewArray(c0, c128, 0, 0); + pre_header_->InsertInstructionBefore(array_b, pre_header_->GetLastInstruction()); + array_b->CopyEnvironmentFrom(suspend_check_->GetEnvironment()); + + // LOOP BODY: + // a[i,... i + 3] = [1,...1] + // x = a[i,... i + 3] + // b[i,... i + 3] = x + // a[i,... i + 3] = [1,...1] + HInstruction* vstore1 = AddVecStore(loop_, array_a, phi_); + HInstruction* vload = AddVecLoad(loop_, array_a, phi_); + AddVecStore(loop_, array_b, phi_, vload->AsVecLoad()); + HInstruction* vstore3 = AddVecStore(loop_, array_a, phi_, vstore1->InputAt(2)); + + PerformLSE(); + + ASSERT_FALSE(IsRemoved(vstore1)); + ASSERT_TRUE(IsRemoved(vload)); + ASSERT_TRUE(IsRemoved(vstore3)); +} + +// Loop write side effects invalidate all stores. +// This causes stores after such loops not to be removed, even +// their values are known. +TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithSideEffects) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* c128 = graph_->GetIntConstant(128); + + // array[0] = 2; + // loop: + // b[i] = array[i] + // array[0] = 2 + AddArraySet(entry_block_, array_, c0, c2); + + HInstruction* array_b = new (GetAllocator()) HNewArray(c0, c128, 0, 0); + pre_header_->InsertInstructionBefore(array_b, pre_header_->GetLastInstruction()); + array_b->CopyEnvironmentFrom(suspend_check_->GetEnvironment()); + + HInstruction* load = AddArrayGet(loop_, array_, phi_); + AddArraySet(loop_, array_b, phi_, load); + + HInstruction* store = AddArraySet(return_block_, array_, c0, c2); + + PerformLSE(); + + ASSERT_FALSE(IsRemoved(store)); +} + +// As it is not allowed to use defaults for VecLoads, check if there is a new created array +// a VecLoad used in a loop and after it is not replaced with a default. +TEST_F(LoadStoreEliminationTest, VLoadDefaultValueInLoopWithoutWriteSideEffects) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c128 = graph_->GetIntConstant(128); + + HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0); + pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction()); + array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment()); + + // LOOP BODY: + // v = a[i,... i + 3] + // array[0,... 3] = v + HInstruction* vload = AddVecLoad(loop_, array_a, phi_); + HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + + PerformLSE(); + + ASSERT_FALSE(IsRemoved(vload)); + ASSERT_FALSE(IsRemoved(vstore)); +} + +// As it is not allowed to use defaults for VecLoads, check if there is a new created array +// a VecLoad is not replaced with a default. +TEST_F(LoadStoreEliminationTest, VLoadDefaultValue) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c128 = graph_->GetIntConstant(128); + + HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0); + pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction()); + array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment()); + + // v = a[0,... 3] + // array[0,... 3] = v + HInstruction* vload = AddVecLoad(pre_header_, array_a, c0); + HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + + PerformLSE(); + + ASSERT_FALSE(IsRemoved(vload)); + ASSERT_FALSE(IsRemoved(vstore)); +} + +// As it is allowed to use defaults for ordinary loads, check if there is a new created array +// a load used in a loop and after it is replaced with a default. +TEST_F(LoadStoreEliminationTest, LoadDefaultValueInLoopWithoutWriteSideEffects) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c128 = graph_->GetIntConstant(128); + + HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0); + pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction()); + array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment()); + + // LOOP BODY: + // v = a[i] + // array[0] = v + HInstruction* load = AddArrayGet(loop_, array_a, phi_); + HInstruction* store = AddArraySet(return_block_, array_, c0, load); + + PerformLSE(); + + ASSERT_TRUE(IsRemoved(load)); + ASSERT_FALSE(IsRemoved(store)); +} + +// As it is allowed to use defaults for ordinary loads, check if there is a new created array +// a load is replaced with a default. +TEST_F(LoadStoreEliminationTest, LoadDefaultValue) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c128 = graph_->GetIntConstant(128); + + HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0); + pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction()); + array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment()); + + // v = a[0] + // array[0] = v + HInstruction* load = AddArrayGet(pre_header_, array_a, c0); + HInstruction* store = AddArraySet(return_block_, array_, c0, load); + + PerformLSE(); + + ASSERT_TRUE(IsRemoved(load)); + ASSERT_FALSE(IsRemoved(store)); +} + +// As it is not allowed to use defaults for VecLoads but allowed for regular loads, +// check if there is a new created array, a VecLoad and a load used in a loop and after it, +// VecLoad is not replaced with a default but the load is. +TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValueInLoopWithoutWriteSideEffects) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c128 = graph_->GetIntConstant(128); + + HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0); + pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction()); + array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment()); + + // LOOP BODY: + // v = a[i,... i + 3] + // v1 = a[i] + // array[0,... 3] = v + // array[0] = v1 + HInstruction* vload = AddVecLoad(loop_, array_a, phi_); + HInstruction* load = AddArrayGet(loop_, array_a, phi_); + HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + HInstruction* store = AddArraySet(return_block_, array_, c0, load); + + PerformLSE(); + + ASSERT_FALSE(IsRemoved(vload)); + ASSERT_TRUE(IsRemoved(load)); + ASSERT_FALSE(IsRemoved(vstore)); + ASSERT_FALSE(IsRemoved(store)); +} + +// As it is not allowed to use defaults for VecLoads but allowed for regular loads, +// check if there is a new created array, a VecLoad and a load, +// VecLoad is not replaced with a default but the load is. +TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValue) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c128 = graph_->GetIntConstant(128); + + HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0); + pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction()); + array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment()); + + // v = a[0,... 3] + // v1 = a[0] + // array[0,... 3] = v + // array[0] = v1 + HInstruction* vload = AddVecLoad(pre_header_, array_a, c0); + HInstruction* load = AddArrayGet(pre_header_, array_a, c0); + HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + HInstruction* store = AddArraySet(return_block_, array_, c0, load); + + PerformLSE(); + + ASSERT_FALSE(IsRemoved(vload)); + ASSERT_TRUE(IsRemoved(load)); + ASSERT_FALSE(IsRemoved(vstore)); + ASSERT_FALSE(IsRemoved(store)); +} + +// It is not allowed to use defaults for VecLoads. However it should not prevent from removing +// loads getting the same value. +// Check a load getting a known value is eliminated (a loop test case). +TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoadInLoopWithoutWriteSideEffects) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c128 = graph_->GetIntConstant(128); + + HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0); + pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction()); + array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment()); + + // LOOP BODY: + // v = a[i,... i + 3] + // v1 = a[i,... i + 3] + // array[0,... 3] = v + // array[128,... 131] = v1 + HInstruction* vload1 = AddVecLoad(loop_, array_a, phi_); + HInstruction* vload2 = AddVecLoad(loop_, array_a, phi_); + HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad()); + HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad()); + + PerformLSE(); + + ASSERT_FALSE(IsRemoved(vload1)); + ASSERT_TRUE(IsRemoved(vload2)); + ASSERT_FALSE(IsRemoved(vstore1)); + ASSERT_FALSE(IsRemoved(vstore2)); +} + +// It is not allowed to use defaults for VecLoads. However it should not prevent from removing +// loads getting the same value. +// Check a load getting a known value is eliminated. +TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoad) { + InitGraph(); + CreateTestControlFlowGraph(); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c128 = graph_->GetIntConstant(128); + + HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0); + pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction()); + array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment()); + + // v = a[0,... 3] + // v1 = a[0,... 3] + // array[0,... 3] = v + // array[128,... 131] = v1 + HInstruction* vload1 = AddVecLoad(pre_header_, array_a, c0); + HInstruction* vload2 = AddVecLoad(pre_header_, array_a, c0); + HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad()); + HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad()); + + PerformLSE(); + + ASSERT_FALSE(IsRemoved(vload1)); + ASSERT_TRUE(IsRemoved(vload2)); + ASSERT_FALSE(IsRemoved(vstore1)); + ASSERT_FALSE(IsRemoved(vstore2)); +} + +} // namespace art diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc index 2ae3683ffa..78505171cb 100644 --- a/compiler/optimizing/loop_analysis.cc +++ b/compiler/optimizing/loop_analysis.cc @@ -178,12 +178,232 @@ class Arm64LoopHelper : public ArchDefaultLoopHelper { } }; +// Custom implementation of loop helper for X86_64 target. Enables heuristics for scalar loop +// peeling and unrolling and supports SIMD loop unrolling. +class X86_64LoopHelper : public ArchDefaultLoopHelper { + // mapping of machine instruction count for most used IR instructions + // Few IRs generate different number of instructions based on input and result type. + // We checked top java apps, benchmarks and used the most generated instruction count. + uint32_t GetMachineInstructionCount(HInstruction* inst) const { + switch (inst->GetKind()) { + case HInstruction::InstructionKind::kAbs: + return 3; + case HInstruction::InstructionKind::kAdd: + return 1; + case HInstruction::InstructionKind::kAnd: + return 1; + case HInstruction::InstructionKind::kArrayLength: + return 1; + case HInstruction::InstructionKind::kArrayGet: + return 1; + case HInstruction::InstructionKind::kArraySet: + return 1; + case HInstruction::InstructionKind::kBoundsCheck: + return 2; + case HInstruction::InstructionKind::kCheckCast: + return 9; + case HInstruction::InstructionKind::kDiv: + return 8; + case HInstruction::InstructionKind::kDivZeroCheck: + return 2; + case HInstruction::InstructionKind::kEqual: + return 3; + case HInstruction::InstructionKind::kGreaterThan: + return 3; + case HInstruction::InstructionKind::kGreaterThanOrEqual: + return 3; + case HInstruction::InstructionKind::kIf: + return 2; + case HInstruction::InstructionKind::kInstanceFieldGet: + return 2; + case HInstruction::InstructionKind::kInstanceFieldSet: + return 1; + case HInstruction::InstructionKind::kLessThan: + return 3; + case HInstruction::InstructionKind::kLessThanOrEqual: + return 3; + case HInstruction::InstructionKind::kMax: + return 2; + case HInstruction::InstructionKind::kMin: + return 2; + case HInstruction::InstructionKind::kMul: + return 1; + case HInstruction::InstructionKind::kNotEqual: + return 3; + case HInstruction::InstructionKind::kOr: + return 1; + case HInstruction::InstructionKind::kRem: + return 11; + case HInstruction::InstructionKind::kSelect: + return 2; + case HInstruction::InstructionKind::kShl: + return 1; + case HInstruction::InstructionKind::kShr: + return 1; + case HInstruction::InstructionKind::kSub: + return 1; + case HInstruction::InstructionKind::kTypeConversion: + return 1; + case HInstruction::InstructionKind::kUShr: + return 1; + case HInstruction::InstructionKind::kVecReplicateScalar: + return 2; + case HInstruction::InstructionKind::kVecExtractScalar: + return 1; + case HInstruction::InstructionKind::kVecReduce: + return 4; + case HInstruction::InstructionKind::kVecNeg: + return 2; + case HInstruction::InstructionKind::kVecAbs: + return 4; + case HInstruction::InstructionKind::kVecNot: + return 3; + case HInstruction::InstructionKind::kVecAdd: + return 1; + case HInstruction::InstructionKind::kVecSub: + return 1; + case HInstruction::InstructionKind::kVecMul: + return 1; + case HInstruction::InstructionKind::kVecDiv: + return 1; + case HInstruction::InstructionKind::kVecMax: + return 1; + case HInstruction::InstructionKind::kVecMin: + return 1; + case HInstruction::InstructionKind::kVecOr: + return 1; + case HInstruction::InstructionKind::kVecXor: + return 1; + case HInstruction::InstructionKind::kVecShl: + return 1; + case HInstruction::InstructionKind::kVecShr: + return 1; + case HInstruction::InstructionKind::kVecLoad: + return 1; + case HInstruction::InstructionKind::kVecStore: + return 1; + case HInstruction::InstructionKind::kXor: + return 1; + default: + return 1; + } + } + + // Maximum possible unrolling factor. + static constexpr uint32_t kX86_64MaxUnrollFactor = 2; // pow(2,2) = 4 + + // According to Intel® 64 and IA-32 Architectures Optimization Reference Manual, + // avoid excessive loop unrolling to ensure LSD (loop stream decoder) is operating efficiently. + // This variable takes care that unrolled loop instructions should not exceed LSD size. + // For Intel Atom processors (silvermont & goldmont), LSD size is 28 + // TODO - identify architecture and LSD size at runtime + static constexpr uint32_t kX86_64UnrolledMaxBodySizeInstr = 28; + + // Loop's maximum basic block count. Loops with higher count will not be partial + // unrolled (unknown iterations). + static constexpr uint32_t kX86_64UnknownIterMaxBodySizeBlocks = 2; + + uint32_t GetUnrollingFactor(HLoopInformation* loop_info, HBasicBlock* header) const; + + public: + uint32_t GetSIMDUnrollingFactor(HBasicBlock* block, + int64_t trip_count, + uint32_t max_peel, + uint32_t vector_length) const override { + DCHECK_NE(vector_length, 0u); + HLoopInformation* loop_info = block->GetLoopInformation(); + DCHECK(loop_info); + HBasicBlock* header = loop_info->GetHeader(); + DCHECK(header); + uint32_t unroll_factor = 0; + + if ((trip_count == 0) || (trip_count == LoopAnalysisInfo::kUnknownTripCount)) { + // Don't unroll for large loop body size. + unroll_factor = GetUnrollingFactor(loop_info, header); + if (unroll_factor <= 1) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } + } else { + // Don't unroll with insufficient iterations. + if (trip_count < (2 * vector_length + max_peel)) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } + + // Don't unroll for large loop body size. + uint32_t unroll_cnt = GetUnrollingFactor(loop_info, header); + if (unroll_cnt <= 1) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } + + // Find a beneficial unroll factor with the following restrictions: + // - At least one iteration of the transformed loop should be executed. + // - The loop body shouldn't be "too big" (heuristic). + uint32_t uf2 = (trip_count - max_peel) / vector_length; + unroll_factor = TruncToPowerOfTwo(std::min(uf2, unroll_cnt)); + DCHECK_GE(unroll_factor, 1u); + } + + return unroll_factor; + } +}; + +uint32_t X86_64LoopHelper::GetUnrollingFactor(HLoopInformation* loop_info, + HBasicBlock* header) const { + uint32_t num_inst = 0, num_inst_header = 0, num_inst_loop_body = 0; + for (HBlocksInLoopIterator it(*loop_info); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + DCHECK(block); + num_inst = 0; + + for (HInstructionIterator it1(block->GetInstructions()); !it1.Done(); it1.Advance()) { + HInstruction* inst = it1.Current(); + DCHECK(inst); + + // SuspendCheck inside loop is handled with Goto. + // Ignoring SuspendCheck & Goto as partially unrolled loop body will have only one Goto. + // Instruction count for Goto is being handled during unroll factor calculation below. + if (inst->IsSuspendCheck() || inst->IsGoto()) { + continue; + } + + num_inst += GetMachineInstructionCount(inst); + } + + if (block == header) { + num_inst_header = num_inst; + } else { + num_inst_loop_body += num_inst; + } + } + + // Calculate actual unroll factor. + uint32_t unrolling_factor = kX86_64MaxUnrollFactor; + uint32_t unrolling_inst = kX86_64UnrolledMaxBodySizeInstr; + // "-3" for one Goto instruction. + uint32_t desired_size = unrolling_inst - num_inst_header - 3; + if (desired_size < (2 * num_inst_loop_body)) { + return 1; + } + + while (unrolling_factor > 0) { + if ((desired_size >> unrolling_factor) >= num_inst_loop_body) { + break; + } + unrolling_factor--; + } + + return (1 << unrolling_factor); +} + ArchNoOptsLoopHelper* ArchNoOptsLoopHelper::Create(InstructionSet isa, ArenaAllocator* allocator) { switch (isa) { case InstructionSet::kArm64: { return new (allocator) Arm64LoopHelper; } + case InstructionSet::kX86_64: { + return new (allocator) X86_64LoopHelper; + } default: { return new (allocator) ArchDefaultLoopHelper; } diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 6c76ab858b..5784707d0e 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -19,8 +19,6 @@ #include "arch/arm/instruction_set_features_arm.h" #include "arch/arm64/instruction_set_features_arm64.h" #include "arch/instruction_set.h" -#include "arch/mips/instruction_set_features_mips.h" -#include "arch/mips64/instruction_set_features_mips64.h" #include "arch/x86/instruction_set_features_x86.h" #include "arch/x86_64/instruction_set_features_x86_64.h" #include "driver/compiler_options.h" @@ -351,7 +349,7 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { // Translates vector operation to reduction kind. static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) { - if (reduction->IsVecAdd() || + if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate() || reduction->IsVecDotProd()) { @@ -763,6 +761,11 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { } // Vectorize loop, if possible and valid. if (kEnableVectorization && + // Disable vectorization for debuggable graphs: this is a workaround for the bug + // in 'GenerateNewLoop' which caused the SuspendCheck environment to be invalid. + // TODO: b/138601207, investigate other possible cases with wrong environment values and + // possibly switch back vectorization on for debuggable graphs. + !graph_->IsDebuggable() && TrySetSimpleLoopHeader(header, &main_phi) && ShouldVectorize(node, body, trip_count) && TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) { @@ -1278,6 +1281,10 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, // (3) unit stride index, // (4) vectorizable right-hand-side value. uint64_t restrictions = kNone; + // Don't accept expressions that can throw. + if (instruction->CanThrow()) { + return false; + } if (instruction->IsArraySet()) { DataType::Type type = instruction->AsArraySet()->GetComponentType(); HInstruction* base = instruction->InputAt(0); @@ -1329,7 +1336,8 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, } // Otherwise accept only expressions with no effects outside the immediate loop-body. // Note that actual uses are inspected during right-hand-side tree traversal. - return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite(); + return !IsUsedOutsideLoop(node->loop_info, instruction) + && !instruction->DoesAnyWrite(); } bool HLoopOptimization::VectorizeUse(LoopNode* node, @@ -1613,77 +1621,25 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: - case DataType::Type::kInt16: *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | - kNoSAD| + kNoSAD | kNoDotProd; return TrySetVectorLength(8); - case DataType::Type::kInt32: - *restrictions |= kNoDiv | kNoSAD; - return TrySetVectorLength(4); - case DataType::Type::kInt64: - *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoSAD; - return TrySetVectorLength(2); - case DataType::Type::kFloat32: - *restrictions |= kNoReduction; - return TrySetVectorLength(4); - case DataType::Type::kFloat64: - *restrictions |= kNoReduction; - return TrySetVectorLength(2); - default: - break; - } // switch type - } - return false; - case InstructionSet::kMips: - if (features->AsMipsInstructionSetFeatures()->HasMsa()) { - switch (type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - *restrictions |= kNoDiv | kNoDotProd; - return TrySetVectorLength(16); - case DataType::Type::kUint16: - case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd; - return TrySetVectorLength(8); - case DataType::Type::kInt32: - *restrictions |= kNoDiv; - return TrySetVectorLength(4); - case DataType::Type::kInt64: - *restrictions |= kNoDiv; - return TrySetVectorLength(2); - case DataType::Type::kFloat32: - *restrictions |= kNoReduction; - return TrySetVectorLength(4); - case DataType::Type::kFloat64: - *restrictions |= kNoReduction; - return TrySetVectorLength(2); - default: - break; - } // switch type - } - return false; - case InstructionSet::kMips64: - if (features->AsMips64InstructionSetFeatures()->HasMsa()) { - switch (type) { - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - *restrictions |= kNoDiv | kNoDotProd; - return TrySetVectorLength(16); - case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd; + *restrictions |= kNoDiv | + kNoAbs | + kNoSignedHAdd | + kNoUnroundedHAdd | + kNoSAD; return TrySetVectorLength(8); case DataType::Type::kInt32: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoSAD; return TrySetVectorLength(4); case DataType::Type::kInt64: - *restrictions |= kNoDiv; + *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoSAD; return TrySetVectorLength(2); case DataType::Type::kFloat32: *restrictions |= kNoReduction; @@ -2156,7 +2112,7 @@ bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node, bool generate_code, DataType::Type reduction_type, uint64_t restrictions) { - if (!instruction->IsAdd() || (reduction_type != DataType::Type::kInt32)) { + if (!instruction->IsAdd() || reduction_type != DataType::Type::kInt32) { return false; } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 1940d55a9d..810871c18f 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1594,7 +1594,7 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { case DataType::Type::kInt32: if (std::isnan(value)) return graph->GetIntConstant(0, GetDexPc()); - if (value >= kPrimIntMax) + if (value >= static_cast<float>(kPrimIntMax)) return graph->GetIntConstant(kPrimIntMax, GetDexPc()); if (value <= kPrimIntMin) return graph->GetIntConstant(kPrimIntMin, GetDexPc()); @@ -1602,7 +1602,7 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { case DataType::Type::kInt64: if (std::isnan(value)) return graph->GetLongConstant(0, GetDexPc()); - if (value >= kPrimLongMax) + if (value >= static_cast<float>(kPrimLongMax)) return graph->GetLongConstant(kPrimLongMax, GetDexPc()); if (value <= kPrimLongMin) return graph->GetLongConstant(kPrimLongMin, GetDexPc()); @@ -1626,7 +1626,7 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { case DataType::Type::kInt64: if (std::isnan(value)) return graph->GetLongConstant(0, GetDexPc()); - if (value >= kPrimLongMax) + if (value >= static_cast<double>(kPrimLongMax)) return graph->GetLongConstant(kPrimLongMax, GetDexPc()); if (value <= kPrimLongMin) return graph->GetLongConstant(kPrimLongMin, GetDexPc()); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index fedad0c69a..7ed5bca947 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -25,6 +25,7 @@ #include "base/arena_containers.h" #include "base/arena_object.h" #include "base/array_ref.h" +#include "base/intrusive_forward_list.h" #include "base/iteration_range.h" #include "base/mutex.h" #include "base/quasi_atomic.h" @@ -45,7 +46,6 @@ #include "mirror/class.h" #include "mirror/method_type.h" #include "offsets.h" -#include "utils/intrusive_forward_list.h" namespace art { @@ -131,6 +131,7 @@ enum GraphAnalysisResult { kAnalysisFailThrowCatchLoop, kAnalysisFailAmbiguousArrayOp, kAnalysisFailIrreducibleLoopAndStringInit, + kAnalysisFailPhiEquivalentInOsr, kAnalysisSuccess, }; @@ -320,6 +321,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool dead_reference_safe = false, bool debuggable = false, bool osr = false, + bool is_shared_jit_code = false, + bool baseline = false, int start_instruction_id = 0) : allocator_(allocator), arena_stack_(arena_stack), @@ -334,6 +337,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { temporaries_vreg_slots_(0), has_bounds_checks_(false), has_try_catch_(false), + has_monitor_operations_(false), has_simd_(false), has_loops_(false), has_irreducible_loops_(false), @@ -355,7 +359,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { art_method_(nullptr), inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()), osr_(osr), - cha_single_implementation_list_(allocator->Adapter(kArenaAllocCHA)) { + baseline_(baseline), + cha_single_implementation_list_(allocator->Adapter(kArenaAllocCHA)), + is_shared_jit_code_(is_shared_jit_code) { blocks_.reserve(kDefaultNumberOfBlocks); } @@ -503,7 +509,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return reverse_post_order_; } - ArrayRef<HBasicBlock* const> GetReversePostOrderSkipEntryBlock() { + ArrayRef<HBasicBlock* const> GetReversePostOrderSkipEntryBlock() const { DCHECK(GetReversePostOrder()[0] == entry_block_); return ArrayRef<HBasicBlock* const>(GetReversePostOrder()).SubArray(1); } @@ -585,6 +591,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool IsCompilingOsr() const { return osr_; } + bool IsCompilingBaseline() const { return baseline_; } + + bool IsCompilingForSharedJitCode() const { + return is_shared_jit_code_; + } + ArenaSet<ArtMethod*>& GetCHASingleImplementationList() { return cha_single_implementation_list_; } @@ -600,6 +612,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + bool HasMonitorOperations() const { return has_monitor_operations_; } + void SetHasMonitorOperations(bool value) { has_monitor_operations_ = value; } + bool HasSIMD() const { return has_simd_; } void SetHasSIMD(bool value) { has_simd_ = value; } @@ -696,6 +711,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // false positives. bool has_try_catch_; + // Flag whether there are any HMonitorOperation in the graph. If yes this will mandate + // DexRegisterMap to be present to allow deadlock analysis for non-debuggable code. + bool has_monitor_operations_; + // Flag whether SIMD instructions appear in the graph. If true, the // code generators may have to be more careful spilling the wider // contents of SIMD registers. @@ -771,9 +790,17 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // compiled code entries which the interpreter can directly jump to. const bool osr_; + // Whether we are compiling baseline (not running optimizations). This affects + // the code being generated. + const bool baseline_; + // List of methods that are assumed to have single implementation. ArenaSet<ArtMethod*> cha_single_implementation_list_; + // Whether we are JIT compiling in the shared region area, putting + // restrictions on, for example, how literals are being generated. + bool is_shared_jit_code_; + friend class SsaBuilder; // For caching constants. friend class SsaLivenessAnalysis; // For the linear order. friend class HInliner; // For the reverse post order. @@ -1099,7 +1126,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { } // Insert `this` between `predecessor` and `successor. This method - // preserves the indicies, and will update the first edge found between + // preserves the indices, and will update the first edge found between // `predecessor` and `successor`. void InsertBetween(HBasicBlock* predecessor, HBasicBlock* successor) { size_t predecessor_index = successor->GetPredecessorIndexOf(predecessor); @@ -1438,6 +1465,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(Shr, BinaryOperation) \ M(StaticFieldGet, Instruction) \ M(StaticFieldSet, Instruction) \ + M(StringBuilderAppend, Instruction) \ M(UnresolvedInstanceFieldGet, Instruction) \ M(UnresolvedInstanceFieldSet, Instruction) \ M(UnresolvedStaticFieldGet, Instruction) \ @@ -1497,17 +1525,6 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) -#ifndef ART_ENABLE_CODEGEN_mips -#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) -#else -#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ - M(MipsComputeBaseMethodAddress, Instruction) \ - M(MipsPackedSwitch, Instruction) \ - M(IntermediateArrayAddressIndex, Instruction) -#endif - -#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M) - #ifndef ART_ENABLE_CODEGEN_x86 #define FOR_EACH_CONCRETE_INSTRUCTION_X86(M) #else @@ -1520,7 +1537,7 @@ class HLoopInformationOutwardIterator : public ValueObject { #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) #define FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M) \ - M(X86AndNot, Instruction) \ + M(X86AndNot, Instruction) \ M(X86MaskOrResetLeastSetBit, Instruction) #else #define FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M) @@ -1533,8 +1550,6 @@ class HLoopInformationOutwardIterator : public ValueObject { FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \ FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \ FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ - FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ - FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M) \ FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \ FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) \ FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M) @@ -2137,12 +2152,13 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // If this instruction will do an implicit null check, return the `HNullCheck` associated // with it. Otherwise return null. HNullCheck* GetImplicitNullCheck() const { - // Find the first previous instruction which is not a move. - HInstruction* first_prev_not_move = GetPreviousDisregardingMoves(); - if (first_prev_not_move != nullptr && - first_prev_not_move->IsNullCheck() && - first_prev_not_move->IsEmittedAtUseSite()) { - return first_prev_not_move->AsNullCheck(); + // Go over previous non-move instructions that are emitted at use site. + HInstruction* prev_not_move = GetPreviousDisregardingMoves(); + while (prev_not_move != nullptr && prev_not_move->IsEmittedAtUseSite()) { + if (prev_not_move->IsNullCheck()) { + return prev_not_move->AsNullCheck(); + } + prev_not_move = prev_not_move->GetPreviousDisregardingMoves(); } return nullptr; } @@ -4775,7 +4791,16 @@ class HInvokeVirtual final : public HInvoke { case Intrinsics::kThreadCurrentThread: case Intrinsics::kStringBufferAppend: case Intrinsics::kStringBufferToString: - case Intrinsics::kStringBuilderAppend: + case Intrinsics::kStringBuilderAppendObject: + case Intrinsics::kStringBuilderAppendString: + case Intrinsics::kStringBuilderAppendCharSequence: + case Intrinsics::kStringBuilderAppendCharArray: + case Intrinsics::kStringBuilderAppendBoolean: + case Intrinsics::kStringBuilderAppendChar: + case Intrinsics::kStringBuilderAppendInt: + case Intrinsics::kStringBuilderAppendLong: + case Intrinsics::kStringBuilderAppendFloat: + case Intrinsics::kStringBuilderAppendDouble: case Intrinsics::kStringBuilderToString: return false; default: @@ -6880,6 +6905,57 @@ class HStaticFieldSet final : public HExpression<2> { const FieldInfo field_info_; }; +class HStringBuilderAppend final : public HVariableInputSizeInstruction { + public: + HStringBuilderAppend(HIntConstant* format, + uint32_t number_of_arguments, + ArenaAllocator* allocator, + uint32_t dex_pc) + : HVariableInputSizeInstruction( + kStringBuilderAppend, + DataType::Type::kReference, + // The runtime call may read memory from inputs. It never writes outside + // of the newly allocated result object (or newly allocated helper objects). + SideEffects::AllReads().Union(SideEffects::CanTriggerGC()), + dex_pc, + allocator, + number_of_arguments + /* format */ 1u, + kArenaAllocInvokeInputs) { + DCHECK_GE(number_of_arguments, 1u); // There must be something to append. + SetRawInputAt(FormatIndex(), format); + } + + void SetArgumentAt(size_t index, HInstruction* argument) { + DCHECK_LE(index, GetNumberOfArguments()); + SetRawInputAt(index, argument); + } + + // Return the number of arguments, excluding the format. + size_t GetNumberOfArguments() const { + DCHECK_GE(InputCount(), 1u); + return InputCount() - 1u; + } + + size_t FormatIndex() const { + return GetNumberOfArguments(); + } + + HIntConstant* GetFormat() { + return InputAt(FormatIndex())->AsIntConstant(); + } + + bool NeedsEnvironment() const override { return true; } + + bool CanThrow() const override { return true; } + + bool CanBeNull() const override { return false; } + + DECLARE_INSTRUCTION(StringBuilderAppend); + + protected: + DEFAULT_COPY_CONSTRUCTOR(StringBuilderAppend); +}; + class HUnresolvedInstanceFieldGet final : public HExpression<1> { public: HUnresolvedInstanceFieldGet(HInstruction* obj, @@ -7222,7 +7298,7 @@ class HInstanceOf final : public HTypeCheckInstruction { } static bool CanCallRuntime(TypeCheckKind check_kind) { - // Mips currently does runtime calls for any other checks. + // TODO: Re-evaluate now that mips codegen has been removed. return check_kind != TypeCheckKind::kExactCheck; } @@ -7789,9 +7865,6 @@ class HIntermediateAddress final : public HExpression<2> { #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) #include "nodes_shared.h" #endif -#ifdef ART_ENABLE_CODEGEN_mips -#include "nodes_mips.h" -#endif #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) #include "nodes_x86.h" #endif diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h deleted file mode 100644 index 4993f5737e..0000000000 --- a/compiler/optimizing/nodes_mips.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ -#define ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ - -namespace art { - -// Compute the address of the method for MIPS Constant area support. -class HMipsComputeBaseMethodAddress : public HExpression<0> { - public: - // Treat the value as an int32_t, but it is really a 32 bit native pointer. - HMipsComputeBaseMethodAddress() - : HExpression(kMipsComputeBaseMethodAddress, - DataType::Type::kInt32, - SideEffects::None(), - kNoDexPc) { - } - - bool CanBeMoved() const override { return true; } - - DECLARE_INSTRUCTION(MipsComputeBaseMethodAddress); - - protected: - DEFAULT_COPY_CONSTRUCTOR(MipsComputeBaseMethodAddress); -}; - -// Mips version of HPackedSwitch that holds a pointer to the base method address. -class HMipsPackedSwitch final : public HExpression<2> { - public: - HMipsPackedSwitch(int32_t start_value, - int32_t num_entries, - HInstruction* input, - HMipsComputeBaseMethodAddress* method_base, - uint32_t dex_pc) - : HExpression(kMipsPackedSwitch, SideEffects::None(), dex_pc), - start_value_(start_value), - num_entries_(num_entries) { - SetRawInputAt(0, input); - SetRawInputAt(1, method_base); - } - - bool IsControlFlow() const override { return true; } - - int32_t GetStartValue() const { return start_value_; } - - int32_t GetNumEntries() const { return num_entries_; } - - HBasicBlock* GetDefaultBlock() const { - // Last entry is the default block. - return GetBlock()->GetSuccessors()[num_entries_]; - } - - DECLARE_INSTRUCTION(MipsPackedSwitch); - - protected: - DEFAULT_COPY_CONSTRUCTOR(MipsPackedSwitch); - - private: - const int32_t start_value_; - const int32_t num_entries_; -}; - -// This instruction computes part of the array access offset (index offset). -// -// For array accesses the element address has the following structure: -// Address = CONST_OFFSET + base_addr + index << ELEM_SHIFT. The address part -// (index << ELEM_SHIFT) can be shared across array accesses with -// the same data type and index. For example, in the following loop 5 accesses can share address -// computation: -// -// void foo(int[] a, int[] b, int[] c) { -// for (i...) { -// a[i] = a[i] + 5; -// b[i] = b[i] + c[i]; -// } -// } -// -// Note: as the instruction doesn't involve base array address into computations it has no side -// effects. -class HIntermediateArrayAddressIndex final : public HExpression<2> { - public: - HIntermediateArrayAddressIndex(HInstruction* index, HInstruction* shift, uint32_t dex_pc) - : HExpression(kIntermediateArrayAddressIndex, - DataType::Type::kInt32, - SideEffects::None(), - dex_pc) { - SetRawInputAt(0, index); - SetRawInputAt(1, shift); - } - - bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { - return true; - } - bool IsActualObject() const override { return false; } - - HInstruction* GetIndex() const { return InputAt(0); } - HInstruction* GetShift() const { return InputAt(1); } - - DECLARE_INSTRUCTION(IntermediateArrayAddressIndex); - - protected: - DEFAULT_COPY_CONSTRUCTOR(IntermediateArrayAddressIndex); -}; - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index efe4d6b000..e8170482e9 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -1155,6 +1155,8 @@ class HVecStore final : public HVecMemoryOperation { // A store needs to stay in place. bool CanBeMoved() const override { return false; } + HInstruction* GetValue() const { return InputAt(2); } + DECLARE_INSTRUCTION(VecStore); protected: diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index 8864a12301..70246604a9 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -22,10 +22,6 @@ #ifdef ART_ENABLE_CODEGEN_arm64 #include "instruction_simplifier_arm64.h" #endif -#ifdef ART_ENABLE_CODEGEN_mips -#include "instruction_simplifier_mips.h" -#include "pc_relative_fixups_mips.h" -#endif #ifdef ART_ENABLE_CODEGEN_x86 #include "pc_relative_fixups_x86.h" #include "instruction_simplifier_x86.h" @@ -108,12 +104,6 @@ const char* OptimizationPassName(OptimizationPass pass) { case OptimizationPass::kInstructionSimplifierArm64: return arm64::InstructionSimplifierArm64::kInstructionSimplifierArm64PassName; #endif -#ifdef ART_ENABLE_CODEGEN_mips - case OptimizationPass::kPcRelativeFixupsMips: - return mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName; - case OptimizationPass::kInstructionSimplifierMips: - return mips::InstructionSimplifierMips::kInstructionSimplifierMipsPassName; -#endif #ifdef ART_ENABLE_CODEGEN_x86 case OptimizationPass::kPcRelativeFixupsX86: return x86::PcRelativeFixups::kPcRelativeFixupsX86PassName; @@ -160,10 +150,6 @@ OptimizationPass OptimizationPassByName(const std::string& pass_name) { #ifdef ART_ENABLE_CODEGEN_arm64 X(OptimizationPass::kInstructionSimplifierArm64); #endif -#ifdef ART_ENABLE_CODEGEN_mips - X(OptimizationPass::kPcRelativeFixupsMips); - X(OptimizationPass::kInstructionSimplifierMips); -#endif #ifdef ART_ENABLE_CODEGEN_x86 X(OptimizationPass::kPcRelativeFixupsX86); X(OptimizationPass::kX86MemoryOperandGeneration); @@ -300,16 +286,6 @@ ArenaVector<HOptimization*> ConstructOptimizations( opt = new (allocator) arm64::InstructionSimplifierArm64(graph, stats); break; #endif -#ifdef ART_ENABLE_CODEGEN_mips - case OptimizationPass::kPcRelativeFixupsMips: - DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; - opt = new (allocator) mips::PcRelativeFixups(graph, codegen, stats); - break; - case OptimizationPass::kInstructionSimplifierMips: - DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; - opt = new (allocator) mips::InstructionSimplifierMips(graph, codegen, stats); - break; -#endif #ifdef ART_ENABLE_CODEGEN_x86 case OptimizationPass::kPcRelativeFixupsX86: DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index b84e03894c..f4777ad754 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -89,10 +89,6 @@ enum class OptimizationPass { #ifdef ART_ENABLE_CODEGEN_arm64 kInstructionSimplifierArm64, #endif -#ifdef ART_ENABLE_CODEGEN_mips - kPcRelativeFixupsMips, - kInstructionSimplifierMips, -#endif #ifdef ART_ENABLE_CODEGEN_x86 kPcRelativeFixupsX86, kInstructionSimplifierX86, diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index a52031cced..bad540e03c 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -27,8 +27,6 @@ #include "read_barrier_config.h" #include "utils/arm/assembler_arm_vixl.h" #include "utils/assembler.h" -#include "utils/mips/assembler_mips.h" -#include "utils/mips64/assembler_mips64.h" #include "optimizing/optimizing_cfi_test_expected.inc" @@ -182,14 +180,6 @@ TEST_ISA(kX86) TEST_ISA(kX86_64) #endif -#ifdef ART_ENABLE_CODEGEN_mips -TEST_ISA(kMips) -#endif - -#ifdef ART_ENABLE_CODEGEN_mips64 -TEST_ISA(kMips64) -#endif - #ifdef ART_ENABLE_CODEGEN_arm TEST_F(OptimizingCFITest, kThumb2Adjust) { using vixl32::r0; @@ -215,66 +205,6 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) { } #endif -#ifdef ART_ENABLE_CODEGEN_mips -TEST_F(OptimizingCFITest, kMipsAdjust) { - // One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. - static constexpr size_t kNumNops = 1u + (1u << 15); - std::vector<uint8_t> expected_asm( - expected_asm_kMips_adjust_head, - expected_asm_kMips_adjust_head + arraysize(expected_asm_kMips_adjust_head)); - expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u); - expected_asm.insert( - expected_asm.end(), - expected_asm_kMips_adjust_tail, - expected_asm_kMips_adjust_tail + arraysize(expected_asm_kMips_adjust_tail)); - std::vector<uint8_t> expected_cfi( - expected_cfi_kMips_adjust, - expected_cfi_kMips_adjust + arraysize(expected_cfi_kMips_adjust)); - SetUpFrame(InstructionSet::kMips); -#define __ down_cast<mips::MipsAssembler*>(GetCodeGenerator()->GetAssembler())-> - mips::MipsLabel target; - __ Beqz(mips::A0, &target); - // Push the target out of range of BEQZ. - for (size_t i = 0; i != kNumNops; ++i) { - __ Nop(); - } - __ Bind(&target); -#undef __ - Finish(); - Check(InstructionSet::kMips, "kMips_adjust", expected_asm, expected_cfi); -} -#endif - -#ifdef ART_ENABLE_CODEGEN_mips64 -TEST_F(OptimizingCFITest, kMips64Adjust) { - // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. - static constexpr size_t kNumNops = 1u + (1u << 15); - std::vector<uint8_t> expected_asm( - expected_asm_kMips64_adjust_head, - expected_asm_kMips64_adjust_head + arraysize(expected_asm_kMips64_adjust_head)); - expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u); - expected_asm.insert( - expected_asm.end(), - expected_asm_kMips64_adjust_tail, - expected_asm_kMips64_adjust_tail + arraysize(expected_asm_kMips64_adjust_tail)); - std::vector<uint8_t> expected_cfi( - expected_cfi_kMips64_adjust, - expected_cfi_kMips64_adjust + arraysize(expected_cfi_kMips64_adjust)); - SetUpFrame(InstructionSet::kMips64); -#define __ down_cast<mips64::Mips64Assembler*>(GetCodeGenerator()->GetAssembler())-> - mips64::Mips64Label target; - __ Beqc(mips64::A1, mips64::A2, &target); - // Push the target out of range of BEQC. - for (size_t i = 0; i != kNumNops; ++i) { - __ Nop(); - } - __ Bind(&target); -#undef __ - Finish(); - Check(InstructionSet::kMips64, "kMips64_adjust", expected_asm, expected_cfi); -} -#endif - #endif // ART_TARGET_ANDROID } // namespace art diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 1e82c4b0f7..4c99700d16 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -136,84 +136,6 @@ static constexpr uint8_t expected_cfi_kX86_64[] = { // 0x00000029: .cfi_restore_state // 0x00000029: .cfi_def_cfa_offset: 64 -static constexpr uint8_t expected_asm_kMips[] = { - 0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF, - 0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7, - 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, - 0x34, 0x00, 0xB0, 0x8F, 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, - 0x09, 0x00, 0xE0, 0x03, 0x40, 0x00, 0xBD, 0x27, -}; -static constexpr uint8_t expected_cfi_kMips[] = { - 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03, - 0x48, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, - 0x0E, 0x40, -}; -// 0x00000000: addiu sp, sp, -64 -// 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: sw ra, +60(sp) -// 0x00000008: .cfi_offset: r31 at cfa-4 -// 0x00000008: sw s1, +56(sp) -// 0x0000000c: .cfi_offset: r17 at cfa-8 -// 0x0000000c: sw s0, +52(sp) -// 0x00000010: .cfi_offset: r16 at cfa-12 -// 0x00000010: sdc1 f22, +40(sp) -// 0x00000014: sdc1 f20, +32(sp) -// 0x00000018: .cfi_remember_state -// 0x00000018: lw ra, +60(sp) -// 0x0000001c: .cfi_restore: r31 -// 0x0000001c: lw s1, +56(sp) -// 0x00000020: .cfi_restore: r17 -// 0x00000020: lw s0, +52(sp) -// 0x00000024: .cfi_restore: r16 -// 0x00000024: ldc1 f22, +40(sp) -// 0x00000028: ldc1 f20, +32(sp) -// 0x0000002c: jr ra -// 0x00000030: addiu sp, sp, 64 -// 0x00000034: .cfi_def_cfa_offset: 0 -// 0x00000034: .cfi_restore_state -// 0x00000034: .cfi_def_cfa_offset: 64 - -static constexpr uint8_t expected_asm_kMips64[] = { - 0xC0, 0xFF, 0xBD, 0x67, 0x38, 0x00, 0xBF, 0xFF, 0x30, 0x00, 0xB1, 0xFF, - 0x28, 0x00, 0xB0, 0xFF, 0x20, 0x00, 0xB9, 0xF7, 0x18, 0x00, 0xB8, 0xF7, - 0x38, 0x00, 0xBF, 0xDF, 0x30, 0x00, 0xB1, 0xDF, 0x28, 0x00, 0xB0, 0xDF, - 0x20, 0x00, 0xB9, 0xD7, 0x18, 0x00, 0xB8, 0xD7, 0x40, 0x00, 0xBD, 0x67, - 0x00, 0x00, 0x1F, 0xD8, -}; -static constexpr uint8_t expected_cfi_kMips64[] = { - 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, - 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, - 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, -}; -// 0x00000000: daddiu sp, sp, -64 -// 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: sd ra, +56(sp) -// 0x00000008: .cfi_offset: r31 at cfa-8 -// 0x00000008: sd s1, +48(sp) -// 0x0000000c: .cfi_offset: r17 at cfa-16 -// 0x0000000c: sd s0, +40(sp) -// 0x00000010: .cfi_offset: r16 at cfa-24 -// 0x00000010: sdc1 f25, +32(sp) -// 0x00000014: .cfi_offset: r57 at cfa-32 -// 0x00000014: sdc1 f24, +24(sp) -// 0x00000018: .cfi_offset: r56 at cfa-40 -// 0x00000018: .cfi_remember_state -// 0x00000018: ld ra, +56(sp) -// 0x0000001c: .cfi_restore: r31 -// 0x0000001c: ld s1, +48(sp) -// 0x00000020: .cfi_restore: r17 -// 0x00000020: ld s0, +40(sp) -// 0x00000024: .cfi_restore: r16 -// 0x00000024: ldc1 f25, +32(sp) -// 0x00000028: .cfi_restore: r57 -// 0x00000028: ldc1 f24, +24(sp) -// 0x0000002c: .cfi_restore: r56 -// 0x0000002c: daddiu sp, sp, 64 -// 0x00000030: .cfi_def_cfa_offset: 0 -// 0x00000030: jic ra, 0 -// 0x00000034: .cfi_restore_state -// 0x00000034: .cfi_def_cfa_offset: 64 - static constexpr uint8_t expected_asm_kThumb2_adjust[] = { // VIXL emits an extra 2 bytes here for a 32-bit beq as there is no // optimistic 16-bit emit and subsequent fixup for out of reach targets @@ -326,112 +248,3 @@ static constexpr uint8_t expected_cfi_kThumb2_adjust[] = { // 0x00000094: pop {r5, r6, pc} // 0x00000096: .cfi_restore_state // 0x00000096: .cfi_def_cfa_offset: 64 - -static constexpr uint8_t expected_asm_kMips_adjust_head[] = { - 0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF, - 0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7, - 0x08, 0x00, 0x80, 0x14, 0xF0, 0xFF, 0xBD, 0x27, - 0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C, - 0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F, - 0x09, 0x00, 0x20, 0x00, 0x10, 0x00, 0xBD, 0x27, -}; -static constexpr uint8_t expected_asm_kMips_adjust_tail[] = { - 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, 0x34, 0x00, 0xB0, 0x8F, - 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, 0x09, 0x00, 0xE0, 0x03, - 0x40, 0x00, 0xBD, 0x27, -}; -static constexpr uint8_t expected_cfi_kMips_adjust[] = { - 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03, - 0x50, 0x0E, 0x50, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A, - 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40, -}; -// 0x00000000: addiu sp, sp, -64 -// 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: sw ra, +60(sp) -// 0x00000008: .cfi_offset: r31 at cfa-4 -// 0x00000008: sw s1, +56(sp) -// 0x0000000c: .cfi_offset: r17 at cfa-8 -// 0x0000000c: sw s0, +52(sp) -// 0x00000010: .cfi_offset: r16 at cfa-12 -// 0x00000010: sdc1 f22, +40(sp) -// 0x00000014: sdc1 f20, +32(sp) -// 0x00000018: bnez a0, 0x0000003c ; +36 -// 0x0000001c: addiu sp, sp, -16 -// 0x00000020: .cfi_def_cfa_offset: 80 -// 0x00000020: sw ra, +0(sp) -// 0x00000024: nal -// 0x00000028: lui at, 2 -// 0x0000002c: ori at, at, 24 -// 0x00000030: addu at, at, ra -// 0x00000034: lw ra, +0(sp) -// 0x00000038: jr at -// 0x0000003c: addiu sp, sp, 16 -// 0x00000040: .cfi_def_cfa_offset: 64 -// 0x00000040: nop -// ... -// 0x00020040: nop -// 0x00020044: .cfi_remember_state -// 0x00020044: lw ra, +60(sp) -// 0x00020048: .cfi_restore: r31 -// 0x00020048: lw s1, +56(sp) -// 0x0002004c: .cfi_restore: r17 -// 0x0002004c: lw s0, +52(sp) -// 0x00020050: .cfi_restore: r16 -// 0x00020050: ldc1 f22, +40(sp) -// 0x00020054: ldc1 f20, +32(sp) -// 0x00020058: jr ra -// 0x0002005c: addiu sp, sp, 64 -// 0x00020060: .cfi_def_cfa_offset: 0 -// 0x00020060: .cfi_restore_state -// 0x00020060: .cfi_def_cfa_offset: 64 - -static constexpr uint8_t expected_asm_kMips64_adjust_head[] = { - 0xC0, 0xFF, 0xBD, 0x67, 0x38, 0x00, 0xBF, 0xFF, 0x30, 0x00, 0xB1, 0xFF, - 0x28, 0x00, 0xB0, 0xFF, 0x20, 0x00, 0xB9, 0xF7, 0x18, 0x00, 0xB8, 0xF7, - 0x02, 0x00, 0xA6, 0x60, 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8, -}; -static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = { - 0x38, 0x00, 0xBF, 0xDF, 0x30, 0x00, 0xB1, 0xDF, 0x28, 0x00, 0xB0, 0xDF, - 0x20, 0x00, 0xB9, 0xD7, 0x18, 0x00, 0xB8, 0xD7, 0x40, 0x00, 0xBD, 0x67, - 0x00, 0x00, 0x1F, 0xD8, -}; -static constexpr uint8_t expected_cfi_kMips64_adjust[] = { - 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, - 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x04, 0x10, 0x00, 0x02, 0x00, 0x0A, - 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, - 0x00, 0x44, 0x0B, 0x0E, 0x40, -}; -// 0x00000000: daddiu sp, sp, -64 -// 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: sd ra, +56(sp) -// 0x00000008: .cfi_offset: r31 at cfa-8 -// 0x00000008: sd s1, +48(sp) -// 0x0000000c: .cfi_offset: r17 at cfa-16 -// 0x0000000c: sd s0, +40(sp) -// 0x00000010: .cfi_offset: r16 at cfa-24 -// 0x00000010: sdc1 f25, +32(sp) -// 0x00000014: .cfi_offset: r57 at cfa-32 -// 0x00000014: sdc1 f24, +24(sp) -// 0x00000018: .cfi_offset: r56 at cfa-40 -// 0x00000018: bnec a1, a2, 0x00000024 ; +12 -// 0x0000001c: auipc at, 2 -// 0x00000020: jic at, 12 ; bc 0x00020028 ; +131080 -// 0x00000024: nop -// ... -// 0x00020024: nop -// 0x00020028: .cfi_remember_state -// 0x00020028: ld ra, +56(sp) -// 0x0002002c: .cfi_restore: r31 -// 0x0002002c: ld s1, +48(sp) -// 0x00020030: .cfi_restore: r17 -// 0x00020030: ld s0, +40(sp) -// 0x00020034: .cfi_restore: r16 -// 0x00020034: ldc1 f25, +32(sp) -// 0x00020038: .cfi_restore: r57 -// 0x00020038: ldc1 f24, +24(sp) -// 0x0002003c: .cfi_restore: r56 -// 0x0002003c: daddiu sp, sp, 64 -// 0x00020040: .cfi_def_cfa_offset: 0 -// 0x00020040: jic ra, 0 -// 0x00020044: .cfi_restore_state -// 0x00020044: .cfi_def_cfa_offset: 64 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index f4bf11d3d3..9978a6fa18 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -137,13 +137,15 @@ class PassObserver : public ValueObject { LOG(INFO) << "TIMINGS " << GetMethodName(); LOG(INFO) << Dumpable<TimingLogger>(timing_logger_); } + if (visualizer_enabled_) { + FlushVisualizer(); + } DCHECK(visualizer_oss_.str().empty()); } - void DumpDisassembly() REQUIRES(!visualizer_dump_mutex_) { + void DumpDisassembly() { if (visualizer_enabled_) { visualizer_.DumpGraphWithDisassembly(); - FlushVisualizer(); } } @@ -158,12 +160,11 @@ class PassObserver : public ValueObject { } private: - void StartPass(const char* pass_name) REQUIRES(!visualizer_dump_mutex_) { + void StartPass(const char* pass_name) { VLOG(compiler) << "Starting pass: " << pass_name; // Dump graph first, then start timer. if (visualizer_enabled_) { visualizer_.DumpGraph(pass_name, /* is_after_pass= */ false, graph_in_bad_state_); - FlushVisualizer(); } if (timing_logger_enabled_) { timing_logger_.StartTiming(pass_name); @@ -178,14 +179,13 @@ class PassObserver : public ValueObject { visualizer_oss_.clear(); } - void EndPass(const char* pass_name, bool pass_change) REQUIRES(!visualizer_dump_mutex_) { + void EndPass(const char* pass_name, bool pass_change) { // Pause timer first, then dump graph. if (timing_logger_enabled_) { timing_logger_.EndTiming(); } if (visualizer_enabled_) { visualizer_.DumpGraph(pass_name, /* is_after_pass= */ true, graph_in_bad_state_); - FlushVisualizer(); } // Validate the HGraph if running in debug mode. @@ -295,6 +295,7 @@ class OptimizingCompiler final : public Compiler { bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, + jit::JitMemoryRegion* region, ArtMethod* method, bool baseline, bool osr, @@ -383,6 +384,7 @@ class OptimizingCompiler final : public Compiler { ArtMethod* method, bool baseline, bool osr, + bool is_shared_jit_code, VariableSizedHandleScope* handles) const; CodeGenerator* TryCompileIntrinsic(ArenaAllocator* allocator, @@ -404,9 +406,7 @@ class OptimizingCompiler final : public Compiler { PassObserver* pass_observer, VariableSizedHandleScope* handles) const; - void GenerateJitDebugInfo(ArtMethod* method, - const debug::MethodDebugInfo& method_debug_info) - REQUIRES_SHARED(Locks::mutator_lock_); + void GenerateJitDebugInfo(const debug::MethodDebugInfo& method_debug_info); std::unique_ptr<OptimizingCompilerStats> compilation_stats_; @@ -450,8 +450,6 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) { return instruction_set == InstructionSet::kArm || instruction_set == InstructionSet::kArm64 || instruction_set == InstructionSet::kThumb2 - || instruction_set == InstructionSet::kMips - || instruction_set == InstructionSet::kMips64 || instruction_set == InstructionSet::kX86 || instruction_set == InstructionSet::kX86_64; } @@ -462,19 +460,6 @@ bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph, PassObserver* pass_observer, VariableSizedHandleScope* handles) const { switch (codegen->GetCompilerOptions().GetInstructionSet()) { -#ifdef ART_ENABLE_CODEGEN_mips - case InstructionSet::kMips: { - OptimizationDef mips_optimizations[] = { - OptDef(OptimizationPass::kPcRelativeFixupsMips) - }; - return RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - mips_optimizations); - } -#endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { OptimizationDef x86_optimizations[] = { @@ -537,36 +522,6 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, arm64_optimizations); } #endif -#ifdef ART_ENABLE_CODEGEN_mips - case InstructionSet::kMips: { - OptimizationDef mips_optimizations[] = { - OptDef(OptimizationPass::kInstructionSimplifierMips), - OptDef(OptimizationPass::kSideEffectsAnalysis), - OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), - OptDef(OptimizationPass::kPcRelativeFixupsMips) - }; - return RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - mips_optimizations); - } -#endif -#ifdef ART_ENABLE_CODEGEN_mips64 - case InstructionSet::kMips64: { - OptimizationDef mips64_optimizations[] = { - OptDef(OptimizationPass::kSideEffectsAnalysis), - OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch") - }; - return RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - mips64_optimizations); - } -#endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { OptimizationDef x86_optimizations[] = { @@ -782,6 +737,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, ArtMethod* method, bool baseline, bool osr, + bool is_shared_jit_code, VariableSizedHandleScope* handles) const { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptBytecodeCompilation); const CompilerOptions& compiler_options = GetCompilerOptions(); @@ -849,7 +805,9 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, kInvalidInvokeType, dead_reference_safe, compiler_options.GetDebuggable(), - /* osr= */ osr); + /* osr= */ osr, + /* is_shared_jit_code= */ is_shared_jit_code, + /* baseline= */ baseline); if (method != nullptr) { graph->SetArtMethod(method); @@ -910,6 +868,11 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, MethodCompilationStat::kNotCompiledIrreducibleLoopAndStringInit); break; } + case kAnalysisFailPhiEquivalentInOsr: { + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledPhiEquivalentInOsr); + break; + } case kAnalysisSuccess: UNREACHABLE(); } @@ -1106,6 +1069,7 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, method, compiler_options.IsBaseline(), /* osr= */ false, + /* is_shared_jit_code= */ false, &handles)); } } @@ -1166,7 +1130,8 @@ static ScopedArenaVector<uint8_t> CreateJniStackMap(ScopedArenaAllocator* alloca jni_compiled_method.GetFrameSize(), jni_compiled_method.GetCoreSpillMask(), jni_compiled_method.GetFpSpillMask(), - /* num_dex_registers= */ 0); + /* num_dex_registers= */ 0, + /* baseline= */ false); stack_map_stream->EndMethod(); return stack_map_stream->Encode(); } @@ -1248,6 +1213,7 @@ bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) { bool OptimizingCompiler::JitCompile(Thread* self, jit::JitCodeCache* code_cache, + jit::JitMemoryRegion* region, ArtMethod* method, bool baseline, bool osr, @@ -1279,39 +1245,24 @@ bool OptimizingCompiler::JitCompile(Thread* self, ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map. ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap(&stack_map_allocator, jni_compiled_method); - uint8_t* stack_map_data = nullptr; - uint8_t* roots_data = nullptr; - uint32_t data_size = code_cache->ReserveData(self, - stack_map.size(), - /* number_of_roots= */ 0, - method, - &stack_map_data, - &roots_data); - if (stack_map_data == nullptr || roots_data == nullptr) { + + ArrayRef<const uint8_t> reserved_code; + ArrayRef<const uint8_t> reserved_data; + if (!code_cache->Reserve(self, + region, + jni_compiled_method.GetCode().size(), + stack_map.size(), + /* number_of_roots= */ 0, + method, + /*out*/ &reserved_code, + /*out*/ &reserved_data)) { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); return false; } - memcpy(stack_map_data, stack_map.data(), stack_map.size()); - - const void* code = code_cache->CommitCode( - self, - method, - stack_map_data, - roots_data, - jni_compiled_method.GetCode().data(), - jni_compiled_method.GetCode().size(), - data_size, - osr, - roots, - /* has_should_deoptimize_flag= */ false, - cha_single_implementation_list); - if (code == nullptr) { - return false; - } + const uint8_t* code = reserved_code.data() + OatQuickMethodHeader::InstructionAlignedSize(); + // Add debug info after we know the code location but before we update entry-point. if (compiler_options.GenerateAnyDebugInfo()) { - const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); - const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; info.custom_name = "art_jni_trampoline"; info.dex_file = dex_file; @@ -1324,12 +1275,27 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.is_native_debuggable = compiler_options.GetNativeDebuggable(); info.is_optimized = true; info.is_code_address_text_relative = false; - info.code_address = code_address; + info.code_address = reinterpret_cast<uintptr_t>(code); info.code_size = jni_compiled_method.GetCode().size(); - info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); + info.frame_size_in_bytes = jni_compiled_method.GetFrameSize(); info.code_info = nullptr; info.cfi = jni_compiled_method.GetCfi(); - GenerateJitDebugInfo(method, info); + GenerateJitDebugInfo(info); + } + + if (!code_cache->Commit(self, + region, + method, + reserved_code, + jni_compiled_method.GetCode(), + reserved_data, + roots, + ArrayRef<const uint8_t>(stack_map), + osr, + /* has_should_deoptimize_flag= */ false, + cha_single_implementation_list)) { + code_cache->Free(self, region, reserved_code.data(), reserved_data.data()); + return false; } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); @@ -1366,8 +1332,9 @@ bool OptimizingCompiler::JitCompile(Thread* self, &code_allocator, dex_compilation_unit, method, - baseline, + baseline || GetCompilerOptions().IsBaseline(), osr, + /* is_shared_jit_code= */ code_cache->IsSharedRegion(*region), &handles)); if (codegen.get() == nullptr) { return false; @@ -1375,20 +1342,23 @@ bool OptimizingCompiler::JitCompile(Thread* self, } ScopedArenaVector<uint8_t> stack_map = codegen->BuildStackMaps(code_item); - size_t number_of_roots = codegen->GetNumberOfJitRoots(); - uint8_t* stack_map_data = nullptr; - uint8_t* roots_data = nullptr; - uint32_t data_size = code_cache->ReserveData(self, - stack_map.size(), - number_of_roots, - method, - &stack_map_data, - &roots_data); - if (stack_map_data == nullptr || roots_data == nullptr) { + + ArrayRef<const uint8_t> reserved_code; + ArrayRef<const uint8_t> reserved_data; + if (!code_cache->Reserve(self, + region, + code_allocator.GetMemory().size(), + stack_map.size(), + /*number_of_roots=*/codegen->GetNumberOfJitRoots(), + method, + /*out*/ &reserved_code, + /*out*/ &reserved_data)) { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); return false; } - memcpy(stack_map_data, stack_map.data(), stack_map.size()); + const uint8_t* code = reserved_code.data() + OatQuickMethodHeader::InstructionAlignedSize(); + const uint8_t* roots_data = reserved_data.data(); + std::vector<Handle<mirror::Object>> roots; codegen->EmitJitRoots(code_allocator.GetData(), roots_data, &roots); // The root Handle<>s filled by the codegen reference entries in the VariableSizedHandleScope. @@ -1398,29 +1368,9 @@ bool OptimizingCompiler::JitCompile(Thread* self, return handles.Contains(root.GetReference()); })); - const void* code = code_cache->CommitCode( - self, - method, - stack_map_data, - roots_data, - code_allocator.GetMemory().data(), - code_allocator.GetMemory().size(), - data_size, - osr, - roots, - codegen->GetGraph()->HasShouldDeoptimizeFlag(), - codegen->GetGraph()->GetCHASingleImplementationList()); - - if (code == nullptr) { - MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); - code_cache->ClearData(self, stack_map_data, roots_data); - return false; - } - + // Add debug info after we know the code location but before we update entry-point. const CompilerOptions& compiler_options = GetCompilerOptions(); if (compiler_options.GenerateAnyDebugInfo()) { - const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); - const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; DCHECK(info.custom_name.empty()); info.dex_file = dex_file; @@ -1433,12 +1383,27 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.is_native_debuggable = compiler_options.GetNativeDebuggable(); info.is_optimized = true; info.is_code_address_text_relative = false; - info.code_address = code_address; + info.code_address = reinterpret_cast<uintptr_t>(code); info.code_size = code_allocator.GetMemory().size(); - info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); - info.code_info = stack_map.size() == 0 ? nullptr : stack_map_data; + info.frame_size_in_bytes = codegen->GetFrameSize(); + info.code_info = stack_map.size() == 0 ? nullptr : stack_map.data(); info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); - GenerateJitDebugInfo(method, info); + GenerateJitDebugInfo(info); + } + + if (!code_cache->Commit(self, + region, + method, + reserved_code, + code_allocator.GetMemory(), + reserved_data, + roots, + ArrayRef<const uint8_t>(stack_map), + osr, + codegen->GetGraph()->HasShouldDeoptimizeFlag(), + codegen->GetGraph()->GetCHASingleImplementationList())) { + code_cache->Free(self, region, reserved_code.data(), reserved_data.data()); + return false; } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); @@ -1462,31 +1427,22 @@ bool OptimizingCompiler::JitCompile(Thread* self, return true; } -void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method ATTRIBUTE_UNUSED, - const debug::MethodDebugInfo& info) { +void OptimizingCompiler::GenerateJitDebugInfo(const debug::MethodDebugInfo& info) { const CompilerOptions& compiler_options = GetCompilerOptions(); - DCHECK(compiler_options.GenerateAnyDebugInfo()); - TimingLogger logger("Generate JIT debug info logger", true, VLOG_IS_ON(jit)); - { - TimingLogger::ScopedTiming st("Generate JIT debug info", &logger); - + if (compiler_options.GenerateAnyDebugInfo()) { // If both flags are passed, generate full debug info. const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); // Create entry for the single method that we just compiled. - std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( - compiler_options.GetInstructionSet(), - compiler_options.GetInstructionSetFeatures(), - mini_debug_info, - info); - AddNativeDebugInfoForJit(Thread::Current(), - reinterpret_cast<const void*>(info.code_address), - elf_file, - debug::PackElfFileForJIT, - compiler_options.GetInstructionSet(), - compiler_options.GetInstructionSetFeatures()); + InstructionSet isa = compiler_options.GetInstructionSet(); + const InstructionSetFeatures* features = compiler_options.GetInstructionSetFeatures(); + std::vector<uint8_t> elf = debug::MakeElfFileForJIT(isa, features, mini_debug_info, info); + + // NB: Don't allow packing of full info since it would remove non-backtrace data. + MutexLock mu(Thread::Current(), *Locks::jit_lock_); + const void* code_ptr = reinterpret_cast<const void*>(info.code_address); + AddNativeDebugInfoForJit(code_ptr, elf, /*allow_packing=*/ mini_debug_info); } - Runtime::Current()->GetJit()->AddTimingLogger(logger); } } // namespace art diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index ddd57f5f1a..83dbef7409 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -61,6 +61,7 @@ enum class MethodCompilationStat { kNotCompiledVerificationError, kNotCompiledVerifyAtRuntime, kNotCompiledIrreducibleLoopAndStringInit, + kNotCompiledPhiEquivalentInOsr, kInlinedMonomorphicCall, kInlinedPolymorphicCall, kMonomorphicCall, diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index e5f694109a..eb262bc123 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -180,7 +180,29 @@ class OptimizingUnitTestHelper { } } + // Run GraphChecker with all checks. + // + // Return: the status whether the run is successful. + bool CheckGraph(HGraph* graph) { + return CheckGraph(graph, /*check_ref_type_info=*/true); + } + + // Run GraphChecker with all checks except reference type information checks. + // + // Return: the status whether the run is successful. + bool CheckGraphSkipRefTypeInfoChecks(HGraph* graph) { + return CheckGraph(graph, /*check_ref_type_info=*/false); + } + private: + bool CheckGraph(HGraph* graph, bool check_ref_type_info) { + GraphChecker checker(graph); + checker.SetRefTypeInfoCheckEnabled(check_ref_type_info); + checker.Run(); + checker.Dump(std::cerr); + return checker.IsValid(); + } + std::vector<std::unique_ptr<const StandardDexFile>> dex_files_; std::unique_ptr<ArenaPoolAndAllocator> pool_and_allocator_; std::unique_ptr<VariableSizedHandleScope> handles_; @@ -194,8 +216,7 @@ class ImprovedOptimizingUnitTest : public OptimizingUnitTest { ImprovedOptimizingUnitTest() : graph_(CreateGraph()), entry_block_(nullptr), return_block_(nullptr), - exit_block_(nullptr), - parameter_(nullptr) {} + exit_block_(nullptr) {} virtual ~ImprovedOptimizingUnitTest() {} @@ -214,25 +235,21 @@ class ImprovedOptimizingUnitTest : public OptimizingUnitTest { entry_block_->AddSuccessor(return_block_); return_block_->AddSuccessor(exit_block_); - parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(0), - 0, - DataType::Type::kInt32); - entry_block_->AddInstruction(parameter_); + CreateParameters(); + for (HInstruction* parameter : parameters_) { + entry_block_->AddInstruction(parameter); + } + return_block_->AddInstruction(new (GetAllocator()) HReturnVoid()); exit_block_->AddInstruction(new (GetAllocator()) HExit()); } bool CheckGraph() { - GraphChecker checker(graph_); - checker.Run(); - if (!checker.IsValid()) { - for (const std::string& error : checker.GetErrors()) { - std::cout << error << std::endl; - } - return false; - } - return true; + return OptimizingUnitTestHelper::CheckGraph(graph_); + } + + bool CheckGraphSkipRefTypeInfoChecks() { + return OptimizingUnitTestHelper::CheckGraphSkipRefTypeInfoChecks(graph_); } HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction, @@ -250,13 +267,17 @@ class ImprovedOptimizingUnitTest : public OptimizingUnitTest { } protected: + // Create parameters to be added to the graph entry block. + // Subclasses can override it to create parameters they need. + virtual void CreateParameters() { /* do nothing */ } + HGraph* graph_; HBasicBlock* entry_block_; HBasicBlock* return_block_; HBasicBlock* exit_block_; - HInstruction* parameter_; + std::vector<HInstruction*> parameters_; }; // Naive string diff data type. diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc deleted file mode 100644 index 05208ff65c..0000000000 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "pc_relative_fixups_mips.h" -#include "code_generator_mips.h" -#include "intrinsics_mips.h" - -namespace art { -namespace mips { - -/** - * Finds instructions that need the constant area base as an input. - */ -class PCRelativeHandlerVisitor : public HGraphVisitor { - public: - PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen) - : HGraphVisitor(graph), - codegen_(down_cast<CodeGeneratorMIPS*>(codegen)), - base_(nullptr) {} - - void MoveBaseIfNeeded() { - if (base_ != nullptr) { - // Bring the base closer to the first use (previously, it was in the - // entry block) and relieve some pressure on the register allocator - // while avoiding recalculation of the base in a loop. - base_->MoveBeforeFirstUserAndOutOfLoops(); - // Computing the base for PC-relative literals will clobber RA with - // the NAL instruction on R2. Take a note of this before generating - // the method entry. - codegen_->ClobberRA(); - } - } - - private: - void InitializePCRelativeBasePointer() { - // Ensure we only initialize the pointer once. - if (base_ != nullptr) { - return; - } - // Insert the base at the start of the entry block, move it to a better - // position later in MoveBaseIfNeeded(). - base_ = new (GetGraph()->GetAllocator()) HMipsComputeBaseMethodAddress(); - HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); - entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction()); - DCHECK(base_ != nullptr); - } - - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override { - // If this is an invoke with PC-relative load kind, - // we need to add the base as the special input. - if (invoke->HasPcRelativeMethodLoadKind() && - !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) { - InitializePCRelativeBasePointer(); - // Add the special argument base to the method. - DCHECK(!invoke->HasCurrentMethodInput()); - invoke->AddSpecialInput(base_); - } - } - - void VisitLoadClass(HLoadClass* load_class) override { - HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); - switch (load_kind) { - case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageRelRo: - case HLoadClass::LoadKind::kBssEntry: - case HLoadClass::LoadKind::kJitBootImageAddress: - // Add a base register for PC-relative literals on R2. - InitializePCRelativeBasePointer(); - load_class->AddSpecialInput(base_); - break; - default: - break; - } - } - - void VisitLoadString(HLoadString* load_string) override { - HLoadString::LoadKind load_kind = load_string->GetLoadKind(); - switch (load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageRelRo: - case HLoadString::LoadKind::kBssEntry: - case HLoadString::LoadKind::kJitBootImageAddress: - // Add a base register for PC-relative literals on R2. - InitializePCRelativeBasePointer(); - load_string->AddSpecialInput(base_); - break; - default: - break; - } - } - - void VisitPackedSwitch(HPackedSwitch* switch_insn) override { - if (switch_insn->GetNumEntries() <= - InstructionCodeGeneratorMIPS::kPackedSwitchJumpTableThreshold) { - return; - } - // We need to replace the HPackedSwitch with a HMipsPackedSwitch in order to - // address the constant area. - InitializePCRelativeBasePointer(); - HGraph* graph = GetGraph(); - HBasicBlock* block = switch_insn->GetBlock(); - HMipsPackedSwitch* mips_switch = new (graph->GetAllocator()) HMipsPackedSwitch( - switch_insn->GetStartValue(), - switch_insn->GetNumEntries(), - switch_insn->InputAt(0), - base_, - switch_insn->GetDexPc()); - block->ReplaceAndRemoveInstructionWith(switch_insn, mips_switch); - } - - CodeGeneratorMIPS* codegen_; - - // The generated HMipsComputeBaseMethodAddress in the entry block needed as an - // input to the HMipsLoadFromConstantTable instructions. - HMipsComputeBaseMethodAddress* base_; -}; - -bool PcRelativeFixups::Run() { - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_); - if (mips_codegen->GetInstructionSetFeatures().IsR6()) { - // Do nothing for R6 because it has PC-relative addressing. - return false; - } - if (graph_->HasIrreducibleLoops()) { - // Do not run this optimization, as irreducible loops do not work with an instruction - // that can be live-in at the irreducible loop header. - return false; - } - PCRelativeHandlerVisitor visitor(graph_, codegen_); - visitor.VisitInsertionOrder(); - visitor.MoveBaseIfNeeded(); - return true; -} - -} // namespace mips -} // namespace art diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h deleted file mode 100644 index 872370bcb7..0000000000 --- a/compiler/optimizing/pc_relative_fixups_mips.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ -#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ - -#include "nodes.h" -#include "optimization.h" - -namespace art { - -class CodeGenerator; - -namespace mips { - -class PcRelativeFixups : public HOptimization { - public: - PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) - : HOptimization(graph, kPcRelativeFixupsMipsPassName, stats), - codegen_(codegen) {} - - static constexpr const char* kPcRelativeFixupsMipsPassName = "pc_relative_fixups_mips"; - - bool Run() override; - - private: - CodeGenerator* codegen_; -}; - -} // namespace mips -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index fbdbf9d086..8c4615d8c2 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -160,8 +160,8 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { if (implicit_clinit != nullptr) { // Remove the check from the graph. It has been merged into the invoke or new-instance. check->GetBlock()->RemoveInstruction(check); - // Check if we can merge the load class as well. - if (can_merge_with_load_class && !load_class->HasUses()) { + // Check if we can merge the load class as well, or whether the LoadClass is now dead. + if ((can_merge_with_load_class || !load_class->CanThrow()) && !load_class->HasUses()) { load_class->GetBlock()->RemoveInstruction(load_class); } } else if (can_merge_with_load_class && diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index b1f0a1add9..1786048b12 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -274,7 +274,7 @@ void RegisterAllocationResolver::UpdateSafepointLiveRegisters() { size_t RegisterAllocationResolver::CalculateMaximumSafepointSpillSize( ArrayRef<HInstruction* const> safepoints) { size_t core_register_spill_size = codegen_->GetWordSize(); - size_t fp_register_spill_size = codegen_->GetFloatingPointSpillSlotSize(); + size_t fp_register_spill_size = codegen_->GetSlowPathFPWidth(); size_t maximum_safepoint_spill_size = 0u; for (HInstruction* instruction : safepoints) { LocationSummary* locations = instruction->GetLocations(); diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index bad73e1b61..a9c217fc4f 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -68,17 +68,6 @@ RegisterAllocator::~RegisterAllocator() { } } -bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED, - InstructionSet instruction_set) { - return instruction_set == InstructionSet::kArm - || instruction_set == InstructionSet::kArm64 - || instruction_set == InstructionSet::kMips - || instruction_set == InstructionSet::kMips64 - || instruction_set == InstructionSet::kThumb2 - || instruction_set == InstructionSet::kX86 - || instruction_set == InstructionSet::kX86_64; -} - class AllRangesIterator : public ValueObject { public: explicit AllRangesIterator(LiveInterval* interval) diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 18ef69fcab..4d226875bf 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -60,9 +60,6 @@ class RegisterAllocator : public DeletableArenaObject<kArenaAllocRegisterAllocat // intervals that intersect each other. Returns false if it failed. virtual bool Validate(bool log_fatal_on_failure) = 0; - static bool CanAllocateRegistersFor(const HGraph& graph, - InstructionSet instruction_set); - // Verifies that live intervals do not conflict. Used by unit testing. static bool ValidateIntervals(ArrayRef<LiveInterval* const> intervals, size_t number_of_spill_slots, diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index fdef45ec8b..f722cf91a7 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -43,34 +43,37 @@ void SchedulingGraph::AddDependency(SchedulingNode* node, } if (is_data_dependency) { - if (!HasImmediateDataDependency(node, dependency)) { - node->AddDataPredecessor(dependency); - } - } else if (!HasImmediateOtherDependency(node, dependency)) { + node->AddDataPredecessor(dependency); + } else { node->AddOtherPredecessor(dependency); } } -static bool MayHaveReorderingDependency(SideEffects node, SideEffects other) { +bool SideEffectDependencyAnalysis::HasReorderingDependency(const HInstruction* instr1, + const HInstruction* instr2) { + SideEffects instr1_side_effects = instr1->GetSideEffects(); + SideEffects instr2_side_effects = instr2->GetSideEffects(); + // Read after write. - if (node.MayDependOn(other)) { + if (instr1_side_effects.MayDependOn(instr2_side_effects)) { return true; } // Write after read. - if (other.MayDependOn(node)) { + if (instr2_side_effects.MayDependOn(instr1_side_effects)) { return true; } // Memory write after write. - if (node.DoesAnyWrite() && other.DoesAnyWrite()) { + if (instr1_side_effects.DoesAnyWrite() && instr2_side_effects.DoesAnyWrite()) { return true; } return false; } -size_t SchedulingGraph::ArrayAccessHeapLocation(HInstruction* instruction) const { +size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessHeapLocation( + HInstruction* instruction) const { DCHECK(heap_location_collector_ != nullptr); size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(instruction); // This array access should be analyzed and added to HeapLocationCollector before. @@ -78,19 +81,19 @@ size_t SchedulingGraph::ArrayAccessHeapLocation(HInstruction* instruction) const return heap_loc; } -bool SchedulingGraph::ArrayAccessMayAlias(HInstruction* node, - HInstruction* other) const { +bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessMayAlias( + HInstruction* instr1, HInstruction* instr2) const { DCHECK(heap_location_collector_ != nullptr); - size_t node_heap_loc = ArrayAccessHeapLocation(node); - size_t other_heap_loc = ArrayAccessHeapLocation(other); + size_t instr1_heap_loc = ArrayAccessHeapLocation(instr1); + size_t instr2_heap_loc = ArrayAccessHeapLocation(instr2); // For example: arr[0] and arr[0] - if (node_heap_loc == other_heap_loc) { + if (instr1_heap_loc == instr2_heap_loc) { return true; } // For example: arr[0] and arr[i] - if (heap_location_collector_->MayAlias(node_heap_loc, other_heap_loc)) { + if (heap_location_collector_->MayAlias(instr1_heap_loc, instr2_heap_loc)) { return true; } @@ -148,55 +151,55 @@ static const FieldInfo* GetFieldInfo(const HInstruction* instruction) { } } -size_t SchedulingGraph::FieldAccessHeapLocation(HInstruction* obj, const FieldInfo* field) const { - DCHECK(obj != nullptr); - DCHECK(field != nullptr); +size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessHeapLocation( + const HInstruction* instr) const { + DCHECK(instr != nullptr); + DCHECK(GetFieldInfo(instr) != nullptr); DCHECK(heap_location_collector_ != nullptr); - size_t heap_loc = heap_location_collector_->GetFieldHeapLocation(obj, field); + size_t heap_loc = heap_location_collector_->GetFieldHeapLocation(instr->InputAt(0), + GetFieldInfo(instr)); // This field access should be analyzed and added to HeapLocationCollector before. DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound); return heap_loc; } -bool SchedulingGraph::FieldAccessMayAlias(const HInstruction* node, - const HInstruction* other) const { +bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessMayAlias( + const HInstruction* instr1, const HInstruction* instr2) const { DCHECK(heap_location_collector_ != nullptr); // Static and instance field accesses should not alias. - if ((IsInstanceFieldAccess(node) && IsStaticFieldAccess(other)) || - (IsStaticFieldAccess(node) && IsInstanceFieldAccess(other))) { + if ((IsInstanceFieldAccess(instr1) && IsStaticFieldAccess(instr2)) || + (IsStaticFieldAccess(instr1) && IsInstanceFieldAccess(instr2))) { return false; } // If either of the field accesses is unresolved. - if (IsUnresolvedFieldAccess(node) || IsUnresolvedFieldAccess(other)) { + if (IsUnresolvedFieldAccess(instr1) || IsUnresolvedFieldAccess(instr2)) { // Conservatively treat these two accesses may alias. return true; } // If both fields accesses are resolved. - const FieldInfo* node_field = GetFieldInfo(node); - const FieldInfo* other_field = GetFieldInfo(other); - - size_t node_loc = FieldAccessHeapLocation(node->InputAt(0), node_field); - size_t other_loc = FieldAccessHeapLocation(other->InputAt(0), other_field); + size_t instr1_field_access_heap_loc = FieldAccessHeapLocation(instr1); + size_t instr2_field_access_heap_loc = FieldAccessHeapLocation(instr2); - if (node_loc == other_loc) { + if (instr1_field_access_heap_loc == instr2_field_access_heap_loc) { return true; } - if (!heap_location_collector_->MayAlias(node_loc, other_loc)) { + if (!heap_location_collector_->MayAlias(instr1_field_access_heap_loc, + instr2_field_access_heap_loc)) { return false; } return true; } -bool SchedulingGraph::HasMemoryDependency(HInstruction* node, - HInstruction* other) const { - if (!MayHaveReorderingDependency(node->GetSideEffects(), other->GetSideEffects())) { +bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::HasMemoryDependency( + HInstruction* instr1, HInstruction* instr2) const { + if (!HasReorderingDependency(instr1, instr2)) { return false; } @@ -208,35 +211,35 @@ bool SchedulingGraph::HasMemoryDependency(HInstruction* node, return true; } - if (IsArrayAccess(node) && IsArrayAccess(other)) { - return ArrayAccessMayAlias(node, other); + if (IsArrayAccess(instr1) && IsArrayAccess(instr2)) { + return ArrayAccessMayAlias(instr1, instr2); } - if (IsFieldAccess(node) && IsFieldAccess(other)) { - return FieldAccessMayAlias(node, other); + if (IsFieldAccess(instr1) && IsFieldAccess(instr2)) { + return FieldAccessMayAlias(instr1, instr2); } // TODO(xueliang): LSA to support alias analysis among HVecLoad, HVecStore and ArrayAccess - if (node->IsVecMemoryOperation() && other->IsVecMemoryOperation()) { + if (instr1->IsVecMemoryOperation() && instr2->IsVecMemoryOperation()) { return true; } - if (node->IsVecMemoryOperation() && IsArrayAccess(other)) { + if (instr1->IsVecMemoryOperation() && IsArrayAccess(instr2)) { return true; } - if (IsArrayAccess(node) && other->IsVecMemoryOperation()) { + if (IsArrayAccess(instr1) && instr2->IsVecMemoryOperation()) { return true; } // Heap accesses of different kinds should not alias. - if (IsArrayAccess(node) && IsFieldAccess(other)) { + if (IsArrayAccess(instr1) && IsFieldAccess(instr2)) { return false; } - if (IsFieldAccess(node) && IsArrayAccess(other)) { + if (IsFieldAccess(instr1) && IsArrayAccess(instr2)) { return false; } - if (node->IsVecMemoryOperation() && IsFieldAccess(other)) { + if (instr1->IsVecMemoryOperation() && IsFieldAccess(instr2)) { return false; } - if (IsFieldAccess(node) && other->IsVecMemoryOperation()) { + if (IsFieldAccess(instr1) && instr2->IsVecMemoryOperation()) { return false; } @@ -245,15 +248,15 @@ bool SchedulingGraph::HasMemoryDependency(HInstruction* node, return true; } -bool SchedulingGraph::HasExceptionDependency(const HInstruction* node, - const HInstruction* other) const { - if (other->CanThrow() && node->GetSideEffects().DoesAnyWrite()) { +bool SideEffectDependencyAnalysis::HasExceptionDependency(const HInstruction* instr1, + const HInstruction* instr2) { + if (instr2->CanThrow() && instr1->GetSideEffects().DoesAnyWrite()) { return true; } - if (other->GetSideEffects().DoesAnyWrite() && node->CanThrow()) { + if (instr2->GetSideEffects().DoesAnyWrite() && instr1->CanThrow()) { return true; } - if (other->CanThrow() && node->CanThrow()) { + if (instr2->CanThrow() && instr1->CanThrow()) { return true; } @@ -262,24 +265,6 @@ bool SchedulingGraph::HasExceptionDependency(const HInstruction* node, return false; } -// Check whether `node` depends on `other`, taking into account `SideEffect` -// information and `CanThrow` information. -bool SchedulingGraph::HasSideEffectDependency(HInstruction* node, - HInstruction* other) const { - if (HasMemoryDependency(node, other)) { - return true; - } - - // Even if above memory dependency check has passed, it is still necessary to - // check dependencies between instructions that can throw and instructions - // that write to memory. - if (HasExceptionDependency(node, other)) { - return true; - } - - return false; -} - // Check if the specified instruction is a better candidate which more likely will // have other instructions depending on it. static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candidate, @@ -297,8 +282,39 @@ static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candid } } -void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_scheduling_barrier) { - SchedulingNode* instruction_node = GetNode(instruction); +void SchedulingGraph::AddCrossIterationDependencies(SchedulingNode* node) { + for (HInstruction* instruction : node->GetInstruction()->GetInputs()) { + // Having a phi-function from a loop header as an input means the current node of the + // scheduling graph has a cross-iteration dependency because such phi-functions bring values + // from the previous iteration to the current iteration. + if (!instruction->IsLoopHeaderPhi()) { + continue; + } + for (HInstruction* phi_input : instruction->GetInputs()) { + // As a scheduling graph of the current basic block is built by + // processing instructions bottom-up, nullptr returned by GetNode means + // an instruction defining a value for the phi is either before the + // instruction represented by node or it is in a different basic block. + SchedulingNode* def_node = GetNode(phi_input); + + // We don't create a dependency if there are uses besides the use in phi. + // In such cases a register to hold phi_input is usually allocated and + // a MOV instruction is generated. In cases with multiple uses and no MOV + // instruction, reordering creating a MOV instruction can improve + // performance more than an attempt to avoid a MOV instruction. + if (def_node != nullptr && def_node != node && phi_input->GetUses().HasExactlyOneElement()) { + // We have an implicit data dependency between node and def_node. + // AddAddDataDependency cannot be used because it is for explicit data dependencies. + // So AddOtherDependency is used. + AddOtherDependency(def_node, node); + } + } + } +} + +void SchedulingGraph::AddDependencies(SchedulingNode* instruction_node, + bool is_scheduling_barrier) { + HInstruction* instruction = instruction_node->GetInstruction(); // Define-use dependencies. for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { @@ -354,12 +370,16 @@ void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_schedul if (other_node->IsSchedulingBarrier()) { // We have reached a scheduling barrier so we can stop further // processing. - DCHECK(HasImmediateOtherDependency(other_node, instruction_node)); + // + // As a "other" dependency is not set up if a data dependency exists, we need to check that + // one of them must exist. + DCHECK(other_node->HasOtherDependency(instruction_node) + || other_node->HasDataDependency(instruction_node)); break; } - if (HasSideEffectDependency(other, instruction)) { + if (side_effect_dependency_analysis_.HasSideEffectDependency(other, instruction)) { if (dep_chain_candidate != nullptr && - HasSideEffectDependency(other, dep_chain_candidate)) { + side_effect_dependency_analysis_.HasSideEffectDependency(other, dep_chain_candidate)) { // Skip an explicit dependency to reduce memory usage, rely on the transitive dependency. } else { AddOtherDependency(other_node, instruction_node); @@ -386,44 +406,8 @@ void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_schedul AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node); } } -} - -bool SchedulingGraph::HasImmediateDataDependency(const SchedulingNode* node, - const SchedulingNode* other) const { - return ContainsElement(node->GetDataPredecessors(), other); -} -bool SchedulingGraph::HasImmediateDataDependency(const HInstruction* instruction, - const HInstruction* other_instruction) const { - const SchedulingNode* node = GetNode(instruction); - const SchedulingNode* other = GetNode(other_instruction); - if (node == nullptr || other == nullptr) { - // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their - // corresponding SchedulingNode in the graph, and tell whether there is a dependency. - // Otherwise there is no dependency from SchedulingGraph's perspective, for example, - // instruction and other_instruction are in different basic blocks. - return false; - } - return HasImmediateDataDependency(node, other); -} - -bool SchedulingGraph::HasImmediateOtherDependency(const SchedulingNode* node, - const SchedulingNode* other) const { - return ContainsElement(node->GetOtherPredecessors(), other); -} - -bool SchedulingGraph::HasImmediateOtherDependency(const HInstruction* instruction, - const HInstruction* other_instruction) const { - const SchedulingNode* node = GetNode(instruction); - const SchedulingNode* other = GetNode(other_instruction); - if (node == nullptr || other == nullptr) { - // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their - // corresponding SchedulingNode in the graph, and tell whether there is a dependency. - // Otherwise there is no dependency from SchedulingGraph's perspective, for example, - // instruction and other_instruction are in different basic blocks. - return false; - } - return HasImmediateOtherDependency(node, other); + AddCrossIterationDependencies(instruction_node); } static const std::string InstructionTypeId(const HInstruction* instruction) { @@ -594,7 +578,7 @@ void HScheduler::Schedule(HBasicBlock* block, ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator.Adapter(kArenaAllocScheduler)); // Build the scheduling graph. - SchedulingGraph scheduling_graph(this, &allocator, heap_location_collector); + SchedulingGraph scheduling_graph(&allocator, heap_location_collector); for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); CHECK_EQ(instruction->GetBlock(), block) diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index d2dbeca924..f7180a02d7 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -21,6 +21,7 @@ #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" +#include "base/stl_util.h" #include "base/time_utils.h" #include "code_generator.h" #include "load_store_analysis.h" @@ -168,6 +169,10 @@ class SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> { } void AddDataPredecessor(SchedulingNode* predecessor) { + // Check whether the predecessor has been added earlier. + if (HasDataDependency(predecessor)) { + return; + } data_predecessors_.push_back(predecessor); predecessor->num_unscheduled_successors_++; } @@ -177,6 +182,12 @@ class SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> { } void AddOtherPredecessor(SchedulingNode* predecessor) { + // Check whether the predecessor has been added earlier. + // As an optimization of the scheduling graph, we don't need to create another dependency if + // there is a data dependency between scheduling nodes. + if (HasOtherDependency(predecessor) || HasDataDependency(predecessor)) { + return; + } other_predecessors_.push_back(predecessor); predecessor->num_unscheduled_successors_++; } @@ -205,6 +216,14 @@ class SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> { uint32_t GetCriticalPath() const { return critical_path_; } bool IsSchedulingBarrier() const { return is_scheduling_barrier_; } + bool HasDataDependency(const SchedulingNode* node) const { + return ContainsElement(data_predecessors_, node); + } + + bool HasOtherDependency(const SchedulingNode* node) const { + return ContainsElement(other_predecessors_, node); + } + private: // The latency of this node. It represents the latency between the moment the // last instruction for this node has executed to the moment the result @@ -246,18 +265,67 @@ class SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> { }; /* + * Provide analysis of instruction dependencies (side effects) which are not in a form of explicit + * def-use data dependencies. + */ +class SideEffectDependencyAnalysis { + public: + explicit SideEffectDependencyAnalysis(const HeapLocationCollector* heap_location_collector) + : memory_dependency_analysis_(heap_location_collector) {} + + bool HasSideEffectDependency(HInstruction* instr1, HInstruction* instr2) const { + if (memory_dependency_analysis_.HasMemoryDependency(instr1, instr2)) { + return true; + } + + // Even if above memory dependency check has passed, it is still necessary to + // check dependencies between instructions that can throw and instructions + // that write to memory. + if (HasExceptionDependency(instr1, instr2)) { + return true; + } + + return false; + } + + private: + static bool HasExceptionDependency(const HInstruction* instr1, const HInstruction* instr2); + static bool HasReorderingDependency(const HInstruction* instr1, const HInstruction* instr2); + + /* + * Memory dependency analysis of instructions based on their memory side effects + * and heap location information from the LCA pass if it is provided. + */ + class MemoryDependencyAnalysis { + public: + explicit MemoryDependencyAnalysis(const HeapLocationCollector* heap_location_collector) + : heap_location_collector_(heap_location_collector) {} + + bool HasMemoryDependency(HInstruction* instr1, HInstruction* instr2) const; + + private: + bool ArrayAccessMayAlias(HInstruction* instr1, HInstruction* instr2) const; + bool FieldAccessMayAlias(const HInstruction* instr1, const HInstruction* instr2) const; + size_t ArrayAccessHeapLocation(HInstruction* instruction) const; + size_t FieldAccessHeapLocation(const HInstruction* instruction) const; + + const HeapLocationCollector* const heap_location_collector_; + }; + + MemoryDependencyAnalysis memory_dependency_analysis_; +}; + +/* * Directed acyclic graph for scheduling. */ class SchedulingGraph : public ValueObject { public: - SchedulingGraph(const HScheduler* scheduler, - ScopedArenaAllocator* allocator, + SchedulingGraph(ScopedArenaAllocator* allocator, const HeapLocationCollector* heap_location_collector) - : scheduler_(scheduler), - allocator_(allocator), + : allocator_(allocator), contains_scheduling_barrier_(false), nodes_map_(allocator_->Adapter(kArenaAllocScheduler)), - heap_location_collector_(heap_location_collector) {} + side_effect_dependency_analysis_(heap_location_collector) {} SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) { std::unique_ptr<SchedulingNode> node( @@ -265,7 +333,7 @@ class SchedulingGraph : public ValueObject { SchedulingNode* result = node.get(); nodes_map_.insert(std::make_pair(instr, std::move(node))); contains_scheduling_barrier_ |= is_scheduling_barrier; - AddDependencies(instr, is_scheduling_barrier); + AddDependencies(result, is_scheduling_barrier); return result; } @@ -278,13 +346,6 @@ class SchedulingGraph : public ValueObject { } } - bool IsSchedulingBarrier(const HInstruction* instruction) const; - - bool HasImmediateDataDependency(const SchedulingNode* node, const SchedulingNode* other) const; - bool HasImmediateDataDependency(const HInstruction* node, const HInstruction* other) const; - bool HasImmediateOtherDependency(const SchedulingNode* node, const SchedulingNode* other) const; - bool HasImmediateOtherDependency(const HInstruction* node, const HInstruction* other) const; - size_t Size() const { return nodes_map_.size(); } @@ -302,26 +363,33 @@ class SchedulingGraph : public ValueObject { void AddOtherDependency(SchedulingNode* node, SchedulingNode* dependency) { AddDependency(node, dependency, /*is_data_dependency*/false); } - bool HasMemoryDependency(HInstruction* node, HInstruction* other) const; - bool HasExceptionDependency(const HInstruction* node, const HInstruction* other) const; - bool HasSideEffectDependency(HInstruction* node, HInstruction* other) const; - bool ArrayAccessMayAlias(HInstruction* node, HInstruction* other) const; - bool FieldAccessMayAlias(const HInstruction* node, const HInstruction* other) const; - size_t ArrayAccessHeapLocation(HInstruction* instruction) const; - size_t FieldAccessHeapLocation(HInstruction* obj, const FieldInfo* field) const; - // Add dependencies nodes for the given `HInstruction`: inputs, environments, and side-effects. - void AddDependencies(HInstruction* instruction, bool is_scheduling_barrier = false); + // Analyze whether the scheduling node has cross-iteration dependencies which mean it uses + // values defined on the previous iteration. + // + // Supported cases: + // + // L: + // v2 = loop_head_phi(v1) + // instr1(v2) + // v1 = instr2 + // goto L + // + // In such cases moving instr2 before instr1 creates intersecting live ranges + // of v1 and v2. As a result a separate register is needed to keep the value + // defined by instr2 which is only used on the next iteration. + // If instr2 is not moved, no additional register is needed. The register + // used by instr1 is reused. + // To prevent such a situation a "other" dependency between instr1 and instr2 must be set. + void AddCrossIterationDependencies(SchedulingNode* node); - const HScheduler* const scheduler_; + // Add dependencies nodes for the given `SchedulingNode`: inputs, environments, and side-effects. + void AddDependencies(SchedulingNode* node, bool is_scheduling_barrier = false); ScopedArenaAllocator* const allocator_; - bool contains_scheduling_barrier_; - ScopedArenaHashMap<const HInstruction*, std::unique_ptr<SchedulingNode>> nodes_map_; - - const HeapLocationCollector* const heap_location_collector_; + SideEffectDependencyAnalysis side_effect_dependency_analysis_; }; /* @@ -477,10 +545,6 @@ class HScheduler { DISALLOW_COPY_AND_ASSIGN(HScheduler); }; -inline bool SchedulingGraph::IsSchedulingBarrier(const HInstruction* instruction) const { - return scheduler_->IsSchedulingBarrier(instruction); -} - class HInstructionScheduling : public HOptimization { public: HInstructionScheduling(HGraph* graph, diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index e0e265a04c..7835b1d3d3 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -54,12 +54,6 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { #ifdef ART_ENABLE_CODEGEN_x86_64 CodegenTargetConfig(InstructionSet::kX86_64, create_codegen_x86_64), #endif -#ifdef ART_ENABLE_CODEGEN_mips - CodegenTargetConfig(InstructionSet::kMips, create_codegen_mips), -#endif -#ifdef ART_ENABLE_CODEGEN_mips64 - CodegenTargetConfig(InstructionSet::kMips64, create_codegen_mips64) -#endif }; for (const CodegenTargetConfig& test_config : test_config_candidates) { @@ -146,9 +140,7 @@ class SchedulerTest : public OptimizingUnitTest { environment->SetRawEnvAt(1, mul); mul->AddEnvUseAt(div_check->GetEnvironment(), 1); - SchedulingGraph scheduling_graph(scheduler, - GetScopedAllocator(), - /* heap_location_collector= */ nullptr); + TestSchedulingGraph scheduling_graph(GetScopedAllocator()); // Instructions must be inserted in reverse order into the scheduling graph. for (HInstruction* instr : ReverseRange(block_instructions)) { scheduling_graph.AddNode(instr); @@ -283,7 +275,7 @@ class SchedulerTest : public OptimizingUnitTest { HeapLocationCollector heap_location_collector(graph_); heap_location_collector.VisitBasicBlock(entry); heap_location_collector.BuildAliasingMatrix(); - SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator(), &heap_location_collector); + TestSchedulingGraph scheduling_graph(GetScopedAllocator(), &heap_location_collector); for (HInstruction* instr : ReverseRange(block_instructions)) { // Build scheduling graph with memory access aliasing information @@ -357,6 +349,41 @@ class SchedulerTest : public OptimizingUnitTest { scheduler->Schedule(graph_); } + class TestSchedulingGraph : public SchedulingGraph { + public: + explicit TestSchedulingGraph(ScopedArenaAllocator* allocator, + const HeapLocationCollector *heap_location_collector = nullptr) + : SchedulingGraph(allocator, heap_location_collector) {} + + bool HasImmediateDataDependency(const HInstruction* instruction, + const HInstruction* other_instruction) const { + const SchedulingNode* node = GetNode(instruction); + const SchedulingNode* other = GetNode(other_instruction); + if (node == nullptr || other == nullptr) { + // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their + // corresponding SchedulingNode in the graph, and tell whether there is a dependency. + // Otherwise there is no dependency from SchedulingGraph's perspective, for example, + // instruction and other_instruction are in different basic blocks. + return false; + } + return node->HasDataDependency(other); + } + + bool HasImmediateOtherDependency(const HInstruction* instruction, + const HInstruction* other_instruction) const { + const SchedulingNode* node = GetNode(instruction); + const SchedulingNode* other = GetNode(other_instruction); + if (node == nullptr || other == nullptr) { + // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their + // corresponding SchedulingNode in the graph, and tell whether there is a dependency. + // Otherwise there is no dependency from SchedulingGraph's perspective, for example, + // instruction and other_instruction are in different basic blocks. + return false; + } + return node->HasOtherDependency(other); + } + }; + HGraph* graph_; }; diff --git a/compiler/optimizing/select_generator_test.cc b/compiler/optimizing/select_generator_test.cc index 6e6549737c..6e68c6c80d 100644 --- a/compiler/optimizing/select_generator_test.cc +++ b/compiler/optimizing/select_generator_test.cc @@ -25,6 +25,14 @@ namespace art { class SelectGeneratorTest : public ImprovedOptimizingUnitTest { + private: + void CreateParameters() override { + parameters_.push_back(new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kInt32)); + } + public: void ConstructBasicGraphForSelect(HInstruction* instr) { HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_); @@ -75,10 +83,10 @@ class SelectGeneratorTest : public ImprovedOptimizingUnitTest { // HDivZeroCheck might throw and should not be hoisted from the conditional to an unconditional. TEST_F(SelectGeneratorTest, testZeroCheck) { InitGraph(); - HDivZeroCheck* instr = new (GetAllocator()) HDivZeroCheck(parameter_, 0); + HDivZeroCheck* instr = new (GetAllocator()) HDivZeroCheck(parameters_[0], 0); ConstructBasicGraphForSelect(instr); - ArenaVector<HInstruction*> current_locals({parameter_, graph_->GetIntConstant(1)}, + ArenaVector<HInstruction*> current_locals({parameters_[0], graph_->GetIntConstant(1)}, GetAllocator()->Adapter(kArenaAllocInstruction)); ManuallyBuildEnvFor(instr, ¤t_locals); @@ -88,7 +96,9 @@ TEST_F(SelectGeneratorTest, testZeroCheck) { // Test that SelectGenerator succeeds with HAdd. TEST_F(SelectGeneratorTest, testAdd) { InitGraph(); - HAdd* instr = new (GetAllocator()) HAdd(DataType::Type::kInt32, parameter_, parameter_, 0); + HAdd* instr = new (GetAllocator()) HAdd(DataType::Type::kInt32, + parameters_[0], + parameters_[0], 0); ConstructBasicGraphForSelect(instr); EXPECT_TRUE(CheckGraphAndTrySelectGenerator()); } diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 8637db13ad..b8471e3721 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -19,6 +19,7 @@ #include "art_method-inl.h" #include "base/casts.h" #include "base/enums.h" +#include "base/logging.h" #include "class_linker.h" #include "code_generator.h" #include "driver/compiler_options.h" @@ -26,29 +27,29 @@ #include "gc/heap.h" #include "gc/space/image_space.h" #include "handle_scope-inl.h" +#include "jit/jit.h" #include "mirror/dex_cache.h" #include "mirror/string.h" #include "nodes.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" -#include "utils/dex_cache_arrays_layout-inl.h" namespace art { static bool IsInBootImage(ArtMethod* method) { - const std::vector<gc::space::ImageSpace*>& image_spaces = - Runtime::Current()->GetHeap()->GetBootImageSpaces(); - for (gc::space::ImageSpace* image_space : image_spaces) { - const ImageSection& method_section = image_space->GetImageHeader().GetMethodsSection(); - if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) { - return true; - } - } - return false; + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK_EQ(heap->IsBootImageAddress(method), + std::any_of(heap->GetBootImageSpaces().begin(), + heap->GetBootImageSpaces().end(), + [=](gc::space::ImageSpace* space) REQUIRES_SHARED(Locks::mutator_lock_) { + return space->GetImageHeader().GetMethodsSection().Contains( + reinterpret_cast<uint8_t*>(method) - space->Begin()); + })); + return heap->IsBootImageAddress(method); } static bool BootImageAOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& compiler_options) { - DCHECK(compiler_options.IsBootImage()); + DCHECK(compiler_options.IsBootImage() || compiler_options.IsBootImageExtension()); ScopedObjectAccess soa(Thread::Current()); ObjPtr<mirror::Class> klass = method->GetDeclaringClass(); DCHECK(klass != nullptr); @@ -86,10 +87,13 @@ HInvokeStaticOrDirect::DispatchInfo HSharpening::SharpenInvokeStaticOrDirect( // Recursive call. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf; - } else if (compiler_options.IsBootImage()) { + } else if (compiler_options.IsBootImage() || compiler_options.IsBootImageExtension()) { if (!compiler_options.GetCompilePic()) { // Test configuration, do not sharpen. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall; + } else if (IsInBootImage(callee)) { + DCHECK(compiler_options.IsBootImageExtension()); + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo; } else if (BootImageAOTCanEmbedMethod(callee, compiler_options)) { method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; } else { @@ -98,11 +102,18 @@ HInvokeStaticOrDirect::DispatchInfo HSharpening::SharpenInvokeStaticOrDirect( } code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } else if (Runtime::Current()->UseJitCompilation()) { - // JIT or on-device AOT compilation referencing a boot image method. - // Use the method address directly. - method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress; - method_load_data = reinterpret_cast<uintptr_t>(callee); - code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + ScopedObjectAccess soa(Thread::Current()); + if (Runtime::Current()->GetJit()->CanEncodeMethod( + callee, + codegen->GetGraph()->IsCompilingForSharedJitCode())) { + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress; + method_load_data = reinterpret_cast<uintptr_t>(callee); + code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + } else { + // Do not sharpen. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall; + code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + } } else if (IsInBootImage(callee)) { // Use PC-relative access to the .data.bimg.rel.ro methods array. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo; @@ -152,19 +163,22 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( HLoadClass::LoadKind desired_load_kind = HLoadClass::LoadKind::kInvalid; Runtime* runtime = Runtime::Current(); const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); - if (compiler_options.IsBootImage()) { - // Compiling boot image. Check if the class is a boot image class. + if (compiler_options.IsBootImage() || compiler_options.IsBootImageExtension()) { + // Compiling boot image or boot image extension. Check if the class is a boot image class. DCHECK(!runtime->UseJitCompilation()); if (!compiler_options.GetCompilePic()) { // Test configuration, do not sharpen. desired_load_kind = HLoadClass::LoadKind::kRuntimeCall; + } else if (klass != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(klass.Get())) { + DCHECK(compiler_options.IsBootImageExtension()); + is_in_boot_image = true; + desired_load_kind = HLoadClass::LoadKind::kBootImageRelRo; } else if ((klass != nullptr) && compiler_options.IsImageClass(dex_file.StringByTypeIdx(type_index))) { is_in_boot_image = true; desired_load_kind = HLoadClass::LoadKind::kBootImageLinkTimePcRelative; } else { // Not a boot image class. - DCHECK(ContainsElement(compiler_options.GetDexFilesForOatFile(), &dex_file)); desired_load_kind = HLoadClass::LoadKind::kBssEntry; } } else { @@ -175,7 +189,16 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( if (is_in_boot_image) { desired_load_kind = HLoadClass::LoadKind::kJitBootImageAddress; } else if (klass != nullptr) { - desired_load_kind = HLoadClass::LoadKind::kJitTableAddress; + if (runtime->GetJit()->CanEncodeClass( + klass.Get(), + codegen->GetGraph()->IsCompilingForSharedJitCode())) { + desired_load_kind = HLoadClass::LoadKind::kJitTableAddress; + } else { + // Shared JIT code cannot encode a literal that the GC can move. + VLOG(jit) << "Unable to encode in shared region class literal: " + << klass->PrettyClass(); + desired_load_kind = HLoadClass::LoadKind::kRuntimeCall; + } } else { // Class not loaded yet. This happens when the dex code requesting // this `HLoadClass` hasn't been executed in the interpreter. @@ -299,12 +322,11 @@ void HSharpening::ProcessLoadString( ObjPtr<mirror::String> string = nullptr; const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); - if (compiler_options.IsBootImage()) { - // Compiling boot image. Resolve the string and allocate it if needed, to ensure - // the string will be added to the boot image. + if (compiler_options.IsBootImage() || compiler_options.IsBootImageExtension()) { + // Compiling boot image or boot image extension. Resolve the string and allocate it + // if needed, to ensure the string will be added to the boot image. DCHECK(!runtime->UseJitCompilation()); if (compiler_options.GetCompilePic()) { - DCHECK(ContainsElement(compiler_options.GetDexFilesForOatFile(), &dex_file)); if (compiler_options.IsForceDeterminism()) { // Strings for methods we're compiling should be pre-resolved but Strings in inlined // methods may not be if these inlined methods are not in the boot image profile. @@ -319,7 +341,12 @@ void HSharpening::ProcessLoadString( CHECK(string != nullptr); } if (string != nullptr) { - desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative; + if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { + DCHECK(compiler_options.IsBootImageExtension()); + desired_load_kind = HLoadString::LoadKind::kBootImageRelRo; + } else { + desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative; + } } else { desired_load_kind = HLoadString::LoadKind::kBssEntry; } @@ -331,10 +358,18 @@ void HSharpening::ProcessLoadString( DCHECK(!codegen->GetCompilerOptions().GetCompilePic()); string = class_linker->LookupString(string_index, dex_cache.Get()); if (string != nullptr) { - if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { + gc::Heap* heap = runtime->GetHeap(); + if (heap->ObjectIsInBootImageSpace(string)) { desired_load_kind = HLoadString::LoadKind::kJitBootImageAddress; - } else { + } else if (runtime->GetJit()->CanEncodeString( + string, + codegen->GetGraph()->IsCompilingForSharedJitCode())) { desired_load_kind = HLoadString::LoadKind::kJitTableAddress; + } else { + // Shared JIT code cannot encode a literal that the GC can move. + VLOG(jit) << "Unable to encode in shared region string literal: " + << string->ToModifiedUtf8(); + desired_load_kind = HLoadString::LoadKind::kRuntimeCall; } } else { desired_load_kind = HLoadString::LoadKind::kRuntimeCall; diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 0d0e1ecf1f..a5e8ff65a9 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -496,6 +496,22 @@ void SsaBuilder::RemoveRedundantUninitializedStrings() { } } +static bool HasPhiEquivalentAtLoopEntry(HGraph* graph) { + // Phi equivalents for a dex register do not work with OSR, as the phis will + // receive two different stack slots but only one is recorded in the stack + // map. + for (HBasicBlock* block : graph->GetReversePostOrder()) { + if (block->IsLoopHeader()) { + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + if (it.Current()->AsPhi()->HasEquivalentPhi()) { + return true; + } + } + } + } + return false; +} + GraphAnalysisResult SsaBuilder::BuildSsa() { DCHECK(!graph_->IsInSsaForm()); @@ -574,6 +590,10 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { // other optimizations. RemoveRedundantUninitializedStrings(); + if (graph_->IsCompilingOsr() && HasPhiEquivalentAtLoopEntry(graph_)) { + return kAnalysisFailPhiEquivalentInOsr; + } + graph_->SetInSsaForm(); return kAnalysisSuccess; } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index c88390775c..3ea2815e64 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -19,11 +19,11 @@ #include <iostream> +#include "base/intrusive_forward_list.h" #include "base/iteration_range.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "nodes.h" -#include "utils/intrusive_forward_list.h" namespace art { diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 60ca61c133..dd6d1a2959 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -42,7 +42,8 @@ void StackMapStream::SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offs void StackMapStream::BeginMethod(size_t frame_size_in_bytes, size_t core_spill_mask, size_t fp_spill_mask, - uint32_t num_dex_registers) { + uint32_t num_dex_registers, + bool baseline) { DCHECK(!in_method_) << "Mismatched Begin/End calls"; in_method_ = true; DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called"; @@ -52,6 +53,16 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes, core_spill_mask_ = core_spill_mask; fp_spill_mask_ = fp_spill_mask; num_dex_registers_ = num_dex_registers; + baseline_ = baseline; + + if (kVerifyStackMaps) { + dchecks_.emplace_back([=](const CodeInfo& code_info) { + DCHECK_EQ(code_info.packed_frame_size_, frame_size_in_bytes / kStackAlignment); + DCHECK_EQ(code_info.core_spill_mask_, core_spill_mask); + DCHECK_EQ(code_info.fp_spill_mask_, fp_spill_mask); + DCHECK_EQ(code_info.number_of_dex_registers_, num_dex_registers); + }); + } } void StackMapStream::EndMethod() { @@ -72,7 +83,8 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, uint32_t native_pc_offset, uint32_t register_mask, BitVector* stack_mask, - StackMap::Kind kind) { + StackMap::Kind kind, + bool needs_vreg_info) { DCHECK(in_method_) << "Call BeginMethod first"; DCHECK(!in_stack_map_) << "Mismatched Begin/End calls"; in_stack_map_ = true; @@ -105,7 +117,7 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, lazy_stack_masks_.push_back(stack_mask); current_inline_infos_.clear(); current_dex_registers_.clear(); - expected_num_dex_registers_ = num_dex_registers_; + expected_num_dex_registers_ = needs_vreg_info ? num_dex_registers_ : 0u; if (kVerifyStackMaps) { size_t stack_map_index = stack_maps_.size(); @@ -284,34 +296,39 @@ void StackMapStream::CreateDexRegisterMap() { } } -template<typename Writer, typename Builder> -ALWAYS_INLINE static void EncodeTable(Writer& out, const Builder& bit_table) { - out.WriteBit(false); // Is not deduped. - bit_table.Encode(out); -} - ScopedArenaVector<uint8_t> StackMapStream::Encode() { DCHECK(in_stack_map_ == false) << "Mismatched Begin/End calls"; DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls"; + uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0; + flags |= baseline_ ? CodeInfo::kIsBaseline : 0; + uint32_t bit_table_flags = 0; + ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) { + if (bit_table->size() != 0) { // Record which bit-tables are stored. + bit_table_flags |= 1 << i; + } + }); + ScopedArenaVector<uint8_t> buffer(allocator_->Adapter(kArenaAllocStackMapStream)); BitMemoryWriter<ScopedArenaVector<uint8_t>> out(&buffer); - out.WriteVarint(packed_frame_size_); - out.WriteVarint(core_spill_mask_); - out.WriteVarint(fp_spill_mask_); - out.WriteVarint(num_dex_registers_); - EncodeTable(out, stack_maps_); - EncodeTable(out, register_masks_); - EncodeTable(out, stack_masks_); - EncodeTable(out, inline_infos_); - EncodeTable(out, method_infos_); - EncodeTable(out, dex_register_masks_); - EncodeTable(out, dex_register_maps_); - EncodeTable(out, dex_register_catalog_); + out.WriteInterleavedVarints(std::array<uint32_t, CodeInfo::kNumHeaders>{ + flags, + packed_frame_size_, + core_spill_mask_, + fp_spill_mask_, + num_dex_registers_, + bit_table_flags, + }); + ForEachBitTable([&out](size_t, auto bit_table) { + if (bit_table->size() != 0) { // Skip empty bit-tables. + bit_table->Encode(out); + } + }); // Verify that we can load the CodeInfo and check some essentials. - CodeInfo code_info(buffer.data()); - CHECK_EQ(code_info.Size(), buffer.size()); + size_t number_of_read_bits; + CodeInfo code_info(buffer.data(), &number_of_read_bits); + CHECK_EQ(number_of_read_bits, out.NumberOfWrittenBits()); CHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size()); // Verify all written data (usually only in debug builds). diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 01c6bf9e0e..67f716ce70 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -40,10 +40,10 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { : allocator_(allocator), instruction_set_(instruction_set), stack_maps_(allocator), - inline_infos_(allocator), - method_infos_(allocator), register_masks_(allocator), stack_masks_(allocator), + inline_infos_(allocator), + method_infos_(allocator), dex_register_masks_(allocator), dex_register_maps_(allocator), dex_register_catalog_(allocator), @@ -61,14 +61,16 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { void BeginMethod(size_t frame_size_in_bytes, size_t core_spill_mask, size_t fp_spill_mask, - uint32_t num_dex_registers); + uint32_t num_dex_registers, + bool baseline = false); void EndMethod(); void BeginStackMapEntry(uint32_t dex_pc, uint32_t native_pc_offset, uint32_t register_mask = 0, BitVector* sp_mask = nullptr, - StackMap::Kind kind = StackMap::Kind::Default); + StackMap::Kind kind = StackMap::Kind::Default, + bool needs_vreg_info = true); void EndStackMapEntry(); void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) { @@ -97,17 +99,33 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { void CreateDexRegisterMap(); + // Invokes the callback with pointer of each BitTableBuilder field. + template<typename Callback> + void ForEachBitTable(Callback callback) { + size_t index = 0; + callback(index++, &stack_maps_); + callback(index++, ®ister_masks_); + callback(index++, &stack_masks_); + callback(index++, &inline_infos_); + callback(index++, &method_infos_); + callback(index++, &dex_register_masks_); + callback(index++, &dex_register_maps_); + callback(index++, &dex_register_catalog_); + CHECK_EQ(index, CodeInfo::kNumBitTables); + } + ScopedArenaAllocator* allocator_; const InstructionSet instruction_set_; uint32_t packed_frame_size_ = 0; uint32_t core_spill_mask_ = 0; uint32_t fp_spill_mask_ = 0; uint32_t num_dex_registers_ = 0; + bool baseline_; BitTableBuilder<StackMap> stack_maps_; - BitTableBuilder<InlineInfo> inline_infos_; - BitTableBuilder<MethodInfo> method_infos_; BitTableBuilder<RegisterMask> register_masks_; BitmapTableBuilder stack_masks_; + BitTableBuilder<InlineInfo> inline_infos_; + BitTableBuilder<MethodInfo> method_infos_; BitmapTableBuilder dex_register_masks_; BitTableBuilder<DexRegisterMapInfo> dex_register_maps_; BitTableBuilder<DexRegisterInfo> dex_register_catalog_; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index d28f09fbba..0dd5773cf6 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -689,10 +689,6 @@ TEST(StackMapTest, PackedNativePcTest) { StackMap::PackNativePc(kX86InstructionAlignment, InstructionSet::kX86); uint32_t packed_x86_64 = StackMap::PackNativePc(kX86_64InstructionAlignment, InstructionSet::kX86_64); - uint32_t packed_mips = - StackMap::PackNativePc(kMipsInstructionAlignment, InstructionSet::kMips); - uint32_t packed_mips64 = - StackMap::PackNativePc(kMips64InstructionAlignment, InstructionSet::kMips64); EXPECT_EQ(StackMap::UnpackNativePc(packed_thumb2, InstructionSet::kThumb2), kThumb2InstructionAlignment); EXPECT_EQ(StackMap::UnpackNativePc(packed_arm64, InstructionSet::kArm64), @@ -701,10 +697,6 @@ TEST(StackMapTest, PackedNativePcTest) { kX86InstructionAlignment); EXPECT_EQ(StackMap::UnpackNativePc(packed_x86_64, InstructionSet::kX86_64), kX86_64InstructionAlignment); - EXPECT_EQ(StackMap::UnpackNativePc(packed_mips, InstructionSet::kMips), - kMipsInstructionAlignment); - EXPECT_EQ(StackMap::UnpackNativePc(packed_mips64, InstructionSet::kMips64), - kMips64InstructionAlignment); } TEST(StackMapTest, TestDeduplicateStackMask) { diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc index aa19de683f..ddcf154f99 100644 --- a/compiler/optimizing/superblock_cloner_test.cc +++ b/compiler/optimizing/superblock_cloner_test.cc @@ -31,6 +31,14 @@ using HEdgeSet = SuperblockCloner::HEdgeSet; // This class provides methods and helpers for testing various cloning and copying routines: // individual instruction cloning and cloning of the more coarse-grain structures. class SuperblockClonerTest : public ImprovedOptimizingUnitTest { + private: + void CreateParameters() override { + parameters_.push_back(new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kInt32)); + } + public: void CreateBasicLoopControlFlow(HBasicBlock* position, HBasicBlock* successor, @@ -75,7 +83,7 @@ class SuperblockClonerTest : public ImprovedOptimizingUnitTest { loop_header->AddInstruction(new (GetAllocator()) HIf(loop_check)); // Loop body block. - HInstruction* null_check = new (GetAllocator()) HNullCheck(parameter_, dex_pc); + HInstruction* null_check = new (GetAllocator()) HNullCheck(parameters_[0], dex_pc); HInstruction* array_length = new (GetAllocator()) HArrayLength(null_check, dex_pc); HInstruction* bounds_check = new (GetAllocator()) HBoundsCheck(phi, array_length, dex_pc); HInstruction* array_get = @@ -100,7 +108,7 @@ class SuperblockClonerTest : public ImprovedOptimizingUnitTest { graph_->SetHasBoundsChecks(true); // Adjust HEnvironment for each instruction which require that. - ArenaVector<HInstruction*> current_locals({phi, const_128, parameter_}, + ArenaVector<HInstruction*> current_locals({phi, const_128, parameters_[0]}, GetAllocator()->Adapter(kArenaAllocInstruction)); HEnvironment* env = ManuallyBuildEnvFor(suspend_check, ¤t_locals); @@ -421,7 +429,7 @@ TEST_F(SuperblockClonerTest, LoopPeelingMultipleBackEdges) { if_block->AddSuccessor(temp1); temp1->AddSuccessor(header); - if_block->AddInstruction(new (GetAllocator()) HIf(parameter_)); + if_block->AddInstruction(new (GetAllocator()) HIf(parameters_[0])); HInstructionIterator it(header->GetPhis()); DCHECK(!it.Done()); @@ -586,7 +594,7 @@ TEST_F(SuperblockClonerTest, NestedCaseExitToOutermost) { // Change the loop3 - insert an exit which leads to loop1. HBasicBlock* loop3_extra_if_block = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(loop3_extra_if_block); - loop3_extra_if_block->AddInstruction(new (GetAllocator()) HIf(parameter_)); + loop3_extra_if_block->AddInstruction(new (GetAllocator()) HIf(parameters_[0])); loop3_header->ReplaceSuccessor(loop_body3, loop3_extra_if_block); loop3_extra_if_block->AddSuccessor(loop_body1); // Long exit. |