diff options
Diffstat (limited to 'compiler/optimizing')
41 files changed, 4080 insertions, 568 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index d2357a5d05..7dc094b25f 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -548,7 +548,21 @@ class BCEVisitor : public HGraphVisitor { void VisitBasicBlock(HBasicBlock* block) OVERRIDE { DCHECK(!IsAddedBlock(block)); first_index_bounds_check_map_.clear(); - HGraphVisitor::VisitBasicBlock(block); + // Visit phis and instructions using a safe iterator. The iteration protects + // against deleting the current instruction during iteration. However, it + // must advance next_ if that instruction is deleted during iteration. + for (HInstruction* instruction = block->GetFirstPhi(); instruction != nullptr;) { + DCHECK(instruction->IsInBlock()); + next_ = instruction->GetNext(); + instruction->Accept(this); + instruction = next_; + } + for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) { + DCHECK(instruction->IsInBlock()); + next_ = instruction->GetNext(); + instruction->Accept(this); + instruction = next_; + } // We should never deoptimize from an osr method, otherwise we might wrongly optimize // code dominated by the deoptimization. if (!GetGraph()->IsCompilingOsr()) { @@ -1798,7 +1812,12 @@ class BCEVisitor : public HGraphVisitor { } /** Helper method to replace an instruction with another instruction. */ - static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) { + void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) { + // Safe iteration. + if (instruction == next_) { + next_ = next_->GetNext(); + } + // Replace and remove. instruction->ReplaceWith(replacement); instruction->GetBlock()->RemoveInstruction(instruction); } @@ -1831,6 +1850,9 @@ class BCEVisitor : public HGraphVisitor { // Range analysis based on induction variables. InductionVarRange induction_range_; + // Safe iteration. + HInstruction* next_; + DISALLOW_COPY_AND_ASSIGN(BCEVisitor); }; diff --git a/compiler/optimizing/bytecode_utils.h b/compiler/optimizing/bytecode_utils.h index 6dfffce117..133afa47fe 100644 --- a/compiler/optimizing/bytecode_utils.h +++ b/compiler/optimizing/bytecode_utils.h @@ -26,7 +26,8 @@ namespace art { class CodeItemIterator : public ValueObject { public: - CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc = 0u) + explicit CodeItemIterator(const DexFile::CodeItem& code_item) : CodeItemIterator(code_item, 0u) {} + CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc) : code_ptr_(code_item.insns_ + start_dex_pc), code_end_(code_item.insns_ + code_item.insns_size_in_code_units_), dex_pc_(start_dex_pc) {} diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 8b450e11dc..a5f248dd20 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1375,4 +1375,30 @@ uint32_t CodeGenerator::GetReferenceDisableFlagOffset() const { return klass->GetDisableIntrinsicFlagOffset().Uint32Value(); } +void CodeGenerator::EmitJitRoots(uint8_t* code, + Handle<mirror::ObjectArray<mirror::Object>> roots, + const uint8_t* roots_data, + Handle<mirror::DexCache> outer_dex_cache) { + DCHECK_EQ(static_cast<size_t>(roots->GetLength()), GetNumberOfJitRoots()); + StackHandleScope<1> hs(Thread::Current()); + MutableHandle<mirror::DexCache> h_dex_cache(hs.NewHandle<mirror::DexCache>(nullptr)); + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + size_t index = 0; + for (auto& entry : jit_string_roots_) { + const DexFile& entry_dex_file = *entry.first.dex_file; + // Avoid the expensive FindDexCache call by checking if the string is + // in the compiled method's dex file. + h_dex_cache.Assign(IsSameDexFile(*outer_dex_cache->GetDexFile(), entry_dex_file) + ? outer_dex_cache.Get() + : class_linker->FindDexCache(hs.Self(), entry_dex_file)); + mirror::String* string = class_linker->LookupString( + entry_dex_file, entry.first.string_index, h_dex_cache); + DCHECK(string != nullptr) << "JIT roots require strings to have been loaded"; + roots->Set(index, string); + entry.second = index; + ++index; + } + EmitJitRootPatches(code, roots_data); +} + } // namespace art diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index a81f24e3d8..212d5711f7 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -31,6 +31,7 @@ #include "nodes.h" #include "optimizing_compiler_stats.h" #include "stack_map_stream.h" +#include "string_reference.h" #include "utils/label.h" namespace art { @@ -331,6 +332,17 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item); size_t ComputeStackMapsSize(); + size_t GetNumberOfJitRoots() const { + return jit_string_roots_.size(); + } + + // Fills the `literals` array with literals collected during code generation. + // Also emits literal patches. + void EmitJitRoots(uint8_t* code, + Handle<mirror::ObjectArray<mirror::Object>> roots, + const uint8_t* roots_data, + Handle<mirror::DexCache> outer_dex_cache) + REQUIRES_SHARED(Locks::mutator_lock_); bool IsLeafMethod() const { return is_leaf_; @@ -567,6 +579,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { fpu_callee_save_mask_(fpu_callee_save_mask), stack_map_stream_(graph->GetArena()), block_order_(nullptr), + jit_string_roots_(StringReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), disasm_info_(nullptr), stats_(stats), graph_(graph), @@ -633,6 +647,12 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { return current_slow_path_; } + // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code. + virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED, + const uint8_t* roots_data ATTRIBUTE_UNUSED) { + DCHECK_EQ(jit_string_roots_.size(), 0u); + } + // Frame size required for this method. uint32_t frame_size_; uint32_t core_spill_mask_; @@ -658,6 +678,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // The order to use for code generation. const ArenaVector<HBasicBlock*>* block_order_; + // Maps a StringReference (dex_file, string_index) to the index in the literal table. + // Entries are intially added with a 0 index, and `EmitJitRoots` will compute all the + // indices. + ArenaSafeMap<StringReference, size_t, StringReferenceValueComparator> jit_string_roots_; + DisassemblyInformation* disasm_info_; private: diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index be65f89ef1..f9ef96ceb5 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -489,8 +489,14 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) - : locations->Out(); + Location arg0, arg1; + if (instruction_->IsInstanceOf()) { + arg0 = locations->InAt(1); + arg1 = locations->Out(); + } else { + arg0 = locations->InAt(0); + arg1 = locations->InAt(1); + } DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); @@ -504,26 +510,26 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM { // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. InvokeRuntimeCallingConvention calling_convention; - codegen->EmitParallelMoves( - locations->InAt(1), - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, - object_class, - Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot); - + codegen->EmitParallelMoves(arg0, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + arg1, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes< - kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Class*, mirror::Class*>(); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); } else { DCHECK(instruction_->IsCheckCast()); - arm_codegen->InvokeRuntime(kQuickCheckCast, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); + arm_codegen->InvokeRuntime(kQuickCheckInstanceOf, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); } if (!is_fatal_) { @@ -638,6 +644,11 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); + // The read barrier instrumentation of object ArrayGet + // instructions does not support the HIntermediateAddress + // instruction. + DCHECK(!(instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); // No need to save live registers; it's taken care of by the @@ -894,6 +905,11 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM { (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); + // The read barrier instrumentation of object ArrayGet + // instructions does not support the HIntermediateAddress + // instruction. + DCHECK(!(instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -4841,8 +4857,6 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { instruction->IsStringCharAt(); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); switch (type) { case Primitive::kPrimBoolean: @@ -4915,6 +4929,11 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimNot: { + // The read barrier instrumentation of object ArrayGet + // instructions does not support the HIntermediateAddress + // instruction. + DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); + static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); @@ -5055,8 +5074,6 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { Location value_loc = locations->InAt(2); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); switch (value_type) { case Primitive::kPrimBoolean: @@ -5306,8 +5323,6 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { } void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) { - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -5322,9 +5337,6 @@ void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* Location first = locations->InAt(0); Location second = locations->InAt(1); - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); - if (second.IsRegister()) { __ add(out.AsRegister<Register>(), first.AsRegister<Register>(), @@ -5877,6 +5889,9 @@ HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind( case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + return HLoadString::LoadKind::kDexCacheViaMethod; case HLoadString::LoadKind::kDexCacheViaMethod: break; } @@ -6291,26 +6306,16 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - Label loop, compare_classes; + Label loop; __ Bind(&loop); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc); - // If the class reference currently in `temp` is not null, jump - // to the `compare_classes` label to compare it with the checked - // class. - __ CompareAndBranchIfNonZero(temp, &compare_classes); - // Otherwise, jump to the slow path to throw the exception. - // - // But before, move back the object's class into `temp` before - // going into the slow path, as it has been overwritten in the - // meantime. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); - __ b(type_check_slow_path->GetEntryLabel()); + // If the class reference currently in `temp` is null, jump to the slow path to throw the + // exception. + __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel()); - __ Bind(&compare_classes); + // Otherwise, compare the classes. __ cmp(temp, ShifterOperand(cls)); __ b(&loop, NE); break; @@ -6326,55 +6331,29 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc); - // If the class reference currently in `temp` is not null, jump - // back at the beginning of the loop. - __ CompareAndBranchIfNonZero(temp, &loop); - // Otherwise, jump to the slow path to throw the exception. - // - // But before, move back the object's class into `temp` before - // going into the slow path, as it has been overwritten in the - // meantime. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); - __ b(type_check_slow_path->GetEntryLabel()); + // If the class reference currently in `temp` is null, jump to the slow path to throw the + // exception. + __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel()); + // Otherwise, jump to the beginning of the loop. + __ b(&loop); break; } case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. - Label check_non_primitive_component_type; __ cmp(temp, ShifterOperand(cls)); __ b(&done, EQ); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc); - - // If the component type is not null (i.e. the object is indeed - // an array), jump to label `check_non_primitive_component_type` - // to further check that this component type is not a primitive - // type. - __ CompareAndBranchIfNonZero(temp, &check_non_primitive_component_type); - // Otherwise, jump to the slow path to throw the exception. - // - // But before, move back the object's class into `temp` before - // going into the slow path, as it has been overwritten in the - // meantime. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); - __ b(type_check_slow_path->GetEntryLabel()); - - __ Bind(&check_non_primitive_component_type); + // If the component type is null, jump to the slow path to throw the exception. + __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel()); + // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive type. __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot"); - __ CompareAndBranchIfZero(temp, &done); - // Same comment as above regarding `temp` and the slow path. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); - __ b(type_check_slow_path->GetEntryLabel()); + __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel()); break; } @@ -6390,13 +6369,6 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // instruction (following the runtime calling convention), which // might be cluttered by the potential first read barrier // emission at the beginning of this method. - // - // TODO: Introduce a new runtime entry point taking the object - // to test (instead of its class) as argument, and let it deal - // with the read barrier issues. This will let us refactor this - // case of the `switch` code as it was previously (with a direct - // call to the runtime not using a type checking slow path). - // This should also be beneficial for the other cases above. __ b(type_check_slow_path->GetEntryLabel()); break; } @@ -6843,7 +6815,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering // HeapReference<Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. // } @@ -6919,14 +6891,13 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i } AddSlowPath(slow_path); - // if (rb_state == ReadBarrier::gray_ptr_) + // if (rb_state == ReadBarrier::GrayState()) // ref = ReadBarrier::Mark(ref); // Given the numeric representation, it's enough to check the low bit of the // rb_state. We do that by shifting the bit out of the lock word with LSRS // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1); __ b(slow_path->GetEntryLabel(), CS); // Carry flag is the last bit shifted out by LSRS. __ Bind(slow_path->GetExitLabel()); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index b53750966d..4f7cc618e6 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -459,9 +459,15 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Location class_to_check = locations->InAt(1); - Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) - : locations->Out(); + Location arg0, arg1; + if (instruction_->IsInstanceOf()) { + arg0 = locations->InAt(1); + arg1 = locations->Out(); + } else { + arg0 = locations->InAt(0); + arg1 = locations->InAt(1); + } + DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); @@ -476,21 +482,22 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. InvokeRuntimeCallingConvention calling_convention; - codegen->EmitParallelMoves( - class_to_check, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot, - object_class, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot); - + codegen->EmitParallelMoves(arg0, + LocationFrom(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + arg1, + LocationFrom(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, - const mirror::Class*, const mirror::Class*>(); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Class*, mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); } else { DCHECK(instruction_->IsCheckCast()); - arm64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); + arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); } if (!is_fatal_) { @@ -626,6 +633,11 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); + // The read barrier instrumentation of object ArrayGet + // instructions does not support the HIntermediateAddress + // instruction. + DCHECK(!(instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); // No need to save live registers; it's taken care of by the @@ -876,7 +888,9 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + // The read barrier instrumentation of object ArrayGet + // instructions does not support the HIntermediateAddress + // instruction. DCHECK(!(instruction_->IsArrayGet() && instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); @@ -2192,8 +2206,6 @@ void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( } void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); @@ -2201,10 +2213,7 @@ void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instr locations->SetOut(Location::RequiresRegister()); } -void InstructionCodeGeneratorARM64::VisitIntermediateAddress( - HIntermediateAddress* instruction) { - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); +void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { __ Add(OutputRegister(instruction), InputRegisterAt(instruction, 0), Operand(InputOperandAt(instruction, 1))); @@ -2304,11 +2313,15 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // Block pools between `Load` and `MaybeRecordImplicitNullCheck`. BlockPoolsScope block_pools(masm); + // The read barrier instrumentation of object ArrayGet instructions + // does not support the HIntermediateAddress instruction. + DCHECK(!((type == Primitive::kPrimNot) && + instruction->GetArray()->IsIntermediateAddress() && + kEmitCompilerReadBarrier)); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Object ArrayGet with Baker's read barrier case. Register temp = temps.AcquireW(); - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!instruction->GetArray()->IsIntermediateAddress()); // Note that a potential implicit null check is handled in the // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( @@ -2341,9 +2354,6 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } else { Register temp = temps.AcquireSameSizeAs(obj); if (instruction->GetArray()->IsIntermediateAddress()) { - // The read barrier instrumentation does not support the - // HIntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); // We do not need to compute the intermediate address from the array: the // input instruction has done it already. See the comment in // `TryExtractArrayAccessAddress()`. @@ -2451,9 +2461,6 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireSameSizeAs(array); if (instruction->GetArray()->IsIntermediateAddress()) { - // The read barrier instrumentation does not support the - // HIntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); // We do not need to compute the intermediate address from the array: the // input instruction has done it already. See the comment in // `TryExtractArrayAccessAddress()`. @@ -3594,26 +3601,15 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - vixl::aarch64::Label loop, compare_classes; + vixl::aarch64::Label loop; __ Bind(&loop); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc); - // If the class reference currently in `temp` is not null, jump - // to the `compare_classes` label to compare it with the checked - // class. - __ Cbnz(temp, &compare_classes); - // Otherwise, jump to the slow path to throw the exception. - // - // But before, move back the object's class into `temp` before - // going into the slow path, as it has been overwritten in the - // meantime. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); - __ B(type_check_slow_path->GetEntryLabel()); - - __ Bind(&compare_classes); + // If the class reference currently in `temp` is null, jump to the slow path to throw the + // exception. + __ Cbz(temp, type_check_slow_path->GetEntryLabel()); + // Otherwise, compare classes. __ Cmp(temp, cls); __ B(ne, &loop); break; @@ -3633,20 +3629,12 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // back at the beginning of the loop. __ Cbnz(temp, &loop); // Otherwise, jump to the slow path to throw the exception. - // - // But before, move back the object's class into `temp` before - // going into the slow path, as it has been overwritten in the - // meantime. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ B(type_check_slow_path->GetEntryLabel()); break; } case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. - vixl::aarch64::Label check_non_primitive_component_type; __ Cmp(temp, cls); __ B(eq, &done); @@ -3654,30 +3642,13 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc); - // If the component type is not null (i.e. the object is indeed - // an array), jump to label `check_non_primitive_component_type` - // to further check that this component type is not a primitive - // type. - __ Cbnz(temp, &check_non_primitive_component_type); - // Otherwise, jump to the slow path to throw the exception. - // - // But before, move back the object's class into `temp` before - // going into the slow path, as it has been overwritten in the - // meantime. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); - __ B(type_check_slow_path->GetEntryLabel()); - - __ Bind(&check_non_primitive_component_type); + // If the component type is null, jump to the slow path to throw the exception. + __ Cbz(temp, type_check_slow_path->GetEntryLabel()); + // Otherwise, the object is indeed an array. Further check that this component type is not a + // primitive type. __ Ldrh(temp, HeapOperand(temp, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbz(temp, &done); - // Same comment as above regarding `temp` and the slow path. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); - __ B(type_check_slow_path->GetEntryLabel()); + __ Cbnz(temp, type_check_slow_path->GetEntryLabel()); break; } @@ -3693,13 +3664,6 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // instruction (following the runtime calling convention), which // might be cluttered by the potential first read barrier // emission at the beginning of this method. - // - // TODO: Introduce a new runtime entry point taking the object - // to test (instead of its class) as argument, and let it deal - // with the read barrier issues. This will let us refactor this - // case of the `switch` code as it was previously (with a direct - // call to the runtime not using a type checking slow path). - // This should also be beneficial for the other cases above. __ B(type_check_slow_path->GetEntryLabel()); break; } @@ -4422,6 +4386,9 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( break; case HLoadString::LoadKind::kDexCacheViaMethod: break; + case HLoadString::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + return HLoadString::LoadKind::kDexCacheViaMethod; } return desired_string_load_kind; } @@ -5426,7 +5393,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering // HeapReference<Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. // } @@ -5517,12 +5484,11 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* } AddSlowPath(slow_path); - // if (rb_state == ReadBarrier::gray_ptr_) + // if (rb_state == ReadBarrier::GrayState()) // ref = ReadBarrier::Mark(ref); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index cc40522731..b9814b63e9 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -23,6 +23,7 @@ #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "intrinsics_arm_vixl.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "thread.h" @@ -62,6 +63,7 @@ static bool ExpectedPairLayout(Location location) { return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); } +static constexpr int kCurrentMethodStackOffset = 0; static constexpr size_t kArmInstrMaxSizeInBytes = 4u; #ifdef __ @@ -434,6 +436,67 @@ class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL); }; +class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal) + : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Location arg0, arg1; + if (instruction_->IsInstanceOf()) { + arg0 = locations->InAt(1); + arg1 = locations->Out(); + } else { + arg0 = locations->InAt(0); + arg1 = locations->InAt(1); + } + DCHECK(instruction_->IsCheckCast() + || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + __ Bind(GetEntryLabel()); + + if (!is_fatal_) { + TODO_VIXL32(FATAL); + } + + // We're moving two locations to locations that could overlap, so we need a parallel + // move resolver. + InvokeRuntimeCallingConventionARMVIXL calling_convention; + + codegen->EmitParallelMoves(arg0, + LocationFrom(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + arg1, + LocationFrom(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot); + if (instruction_->IsInstanceOf()) { + TODO_VIXL32(FATAL); + } else { + DCHECK(instruction_->IsCheckCast()); + arm_codegen->InvokeRuntime(kQuickCheckInstanceOf, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); + } + + if (!is_fatal_) { + TODO_VIXL32(FATAL); + } + } + + const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARMVIXL"; } + + bool IsFatal() const OVERRIDE { return is_fatal_; } + + private: + const bool is_fatal_; + + DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL); +}; + class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction) @@ -567,6 +630,11 @@ static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) { return mask; } +size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + GetAssembler()->LoadSFromOffset(vixl32::SRegister(reg_id), sp, stack_index); + return kArmWordSize; +} + #undef __ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, @@ -600,7 +668,7 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15); } -#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> +#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) { GetAssembler()->FinalizeCode(); @@ -1415,9 +1483,26 @@ void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* i // art::PrepareForRegisterAllocation. DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); - // TODO(VIXL): TryDispatch + IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_); + if (intrinsic.TryDispatch(invoke)) { + if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); + } + return; + } HandleInvoke(invoke); + + // TODO(VIXL): invoke->HasPcRelativeDexCache() +} + +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) { + if (invoke->GetLocations()->Intrinsified()) { + IntrinsicCodeGeneratorARMVIXL intrinsic(codegen); + intrinsic.Dispatch(invoke); + return true; + } + return false; } void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { @@ -1425,7 +1510,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD // art::PrepareForRegisterAllocation. DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); - // TODO(VIXL): TryGenerateIntrinsicCode + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } LocationSummary* locations = invoke->GetLocations(); DCHECK(locations->HasTemps()); @@ -1441,13 +1528,18 @@ void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) { } void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { - // TODO(VIXL): TryDispatch + IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_); + if (intrinsic.TryDispatch(invoke)) { + return; + } HandleInvoke(invoke); } void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { - // TODO(VIXL): TryGenerateIntrinsicCode + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); @@ -1507,6 +1599,8 @@ void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: + // TODO(VIXL): Consider introducing an InputVRegister() + // helper function (equivalent to InputRegister()). __ Vneg(OutputVRegister(neg), InputVRegisterAt(neg, 0)); break; @@ -2325,7 +2419,12 @@ void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) { break; } case Primitive::kPrimLong: { - TODO_VIXL32(FATAL); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom( + calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, LocationFrom( + calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); + locations->SetOut(LocationFrom(r0, r1)); break; } case Primitive::kPrimFloat: @@ -2342,6 +2441,7 @@ void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) { } void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) { + Location lhs = div->GetLocations()->InAt(0); Location rhs = div->GetLocations()->InAt(1); switch (div->GetResultType()) { @@ -2357,7 +2457,16 @@ void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) { } case Primitive::kPrimLong: { - TODO_VIXL32(FATAL); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs))); + DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs))); + DCHECK(calling_convention.GetRegisterAt(2).Is(LowRegisterFrom(rhs))); + DCHECK(calling_convention.GetRegisterAt(3).Is(HighRegisterFrom(rhs))); + DCHECK(LowRegisterFrom(div->GetLocations()->Out()).Is(r0)); + DCHECK(HighRegisterFrom(div->GetLocations()->Out()).Is(r1)); + + codegen_->InvokeRuntime(kQuickLdiv, div, div->GetDexPc()); + CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); break; } @@ -3663,7 +3772,7 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { // Also need for String compression feature. if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { - TODO_VIXL32(FATAL); + locations->AddTemp(Location::RequiresRegister()); } } @@ -3692,7 +3801,24 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); if (maybe_compressed_char_at) { - TODO_VIXL32(FATAL); + vixl32::Register length = temps.Acquire(); + vixl32::Label uncompressed_load, done; + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ Cmp(length, 0); + __ B(ge, &uncompressed_load); + GetAssembler()->LoadFromOffset(kLoadUnsignedByte, + RegisterFrom(out_loc), + obj, + data_offset + const_index); + __ B(&done); + __ Bind(&uncompressed_load); + GetAssembler()->LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar), + RegisterFrom(out_loc), + obj, + data_offset + (const_index << 1)); + __ Bind(&done); } else { uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); @@ -3708,7 +3834,18 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Add(temp, obj, data_offset); } if (maybe_compressed_char_at) { - TODO_VIXL32(FATAL); + vixl32::Label uncompressed_load, done; + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + vixl32::Register length = RegisterFrom(locations->GetTemp(0)); + GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ Cmp(length, 0); + __ B(ge, &uncompressed_load); + __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0)); + __ B(&done); + __ Bind(&uncompressed_load); + __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1)); + __ Bind(&done); } else { codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index)); } @@ -4080,7 +4217,10 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction vixl32::Register out = OutputRegister(instruction); GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset); codegen_->MaybeRecordImplicitNullCheck(instruction); - // TODO(VIXL): https://android-review.googlesource.com/#/c/272625/ + // Mask out compression flag from String's array length. + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + __ Bic(out, out, 1u << 31); + } } void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) { @@ -4376,7 +4516,12 @@ void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) { GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem); GetAssembler()->StoreDToOffset(temp, sp, mem); } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) { - TODO_VIXL32(FATAL); + vixl32::DRegister first = DRegisterFrom(source); + vixl32::DRegister second = DRegisterFrom(destination); + vixl32::DRegister temp = temps.AcquireD(); + __ Vmov(temp, first); + __ Vmov(first, second); + __ Vmov(second, temp); } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) { TODO_VIXL32(FATAL); } else if (source.IsFpuRegister() || destination.IsFpuRegister()) { @@ -4609,6 +4754,115 @@ void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) { CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } +static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { + return kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck); +} + +void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) { + LocationSummary::CallKind call_kind = LocationSummary::kNoCall; + bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); + + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { + case TypeCheckKind::kExactCheck: + case TypeCheckKind::kAbstractClassCheck: + case TypeCheckKind::kClassHierarchyCheck: + case TypeCheckKind::kArrayObjectCheck: + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. + break; + case TypeCheckKind::kArrayCheck: + case TypeCheckKind::kUnresolvedCheck: + case TypeCheckKind::kInterfaceCheck: + call_kind = LocationSummary::kCallOnSlowPath; + break; + } + + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Note that TypeCheckSlowPathARM uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (TypeCheckNeedsATemporary(type_check_kind)) { + locations->AddTemp(Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); + vixl32::Register obj = InputRegisterAt(instruction, 0); + vixl32::Register cls = InputRegisterAt(instruction, 1); + Location temp_loc = locations->GetTemp(0); + vixl32::Register temp = RegisterFrom(temp_loc); + Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(1) : + Location::NoLocation(); + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCodeARMVIXL* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); + + vixl32::Label done; + // Avoid null check if we know obj is not null. + if (instruction->MustDoNullCheck()) { + __ Cbz(obj, &done); + } + + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + + switch (type_check_kind) { + case TypeCheckKind::kExactCheck: + case TypeCheckKind::kArrayCheck: { + __ Cmp(temp, cls); + // Jump to slow path for throwing the exception or doing a + // more involved array check. + __ B(ne, type_check_slow_path->GetEntryLabel()); + break; + } + + case TypeCheckKind::kAbstractClassCheck: { + TODO_VIXL32(FATAL); + break; + } + + case TypeCheckKind::kClassHierarchyCheck: { + TODO_VIXL32(FATAL); + break; + } + + case TypeCheckKind::kArrayObjectCheck: { + TODO_VIXL32(FATAL); + break; + } + + case TypeCheckKind::kUnresolvedCheck: + case TypeCheckKind::kInterfaceCheck: + TODO_VIXL32(FATAL); + break; + } + __ Bind(&done); + + __ Bind(type_check_slow_path->GetExitLabel()); +} + void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); } @@ -4780,6 +5034,24 @@ void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* i } } +void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters( + HInstruction* instruction ATTRIBUTE_UNUSED, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp ATTRIBUTE_UNUSED) { + vixl32::Register out_reg = RegisterFrom(out); + vixl32::Register obj_reg = RegisterFrom(obj); + if (kEmitCompilerReadBarrier) { + TODO_VIXL32(FATAL); + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); + GetAssembler()->MaybeUnpoisonHeapReference(out_reg); + } +} + void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( HInstruction* instruction ATTRIBUTE_UNUSED, Location root, @@ -4798,6 +5070,39 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( } } +void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier( + HInstruction* instruction ATTRIBUTE_UNUSED, + Location ref ATTRIBUTE_UNUSED, + vixl::aarch32::Register obj ATTRIBUTE_UNUSED, + uint32_t offset ATTRIBUTE_UNUSED, + Location temp ATTRIBUTE_UNUSED, + bool needs_null_check ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); +} + +void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier( + HInstruction* instruction ATTRIBUTE_UNUSED, + Location ref ATTRIBUTE_UNUSED, + vixl::aarch32::Register obj ATTRIBUTE_UNUSED, + uint32_t offset ATTRIBUTE_UNUSED, + Location index ATTRIBUTE_UNUSED, + ScaleFactor scale_factor ATTRIBUTE_UNUSED, + Location temp ATTRIBUTE_UNUSED, + bool needs_null_check ATTRIBUTE_UNUSED, + bool always_update_field ATTRIBUTE_UNUSED, + vixl::aarch32::Register* temp2 ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); +} + +void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED, + Location out ATTRIBUTE_UNUSED, + Location ref ATTRIBUTE_UNUSED, + Location obj ATTRIBUTE_UNUSED, + uint32_t offset ATTRIBUTE_UNUSED, + Location index ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); +} + void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED, Location out, Location ref ATTRIBUTE_UNUSED, @@ -4871,7 +5176,10 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( if (current_method.IsRegister()) { method_reg = RegisterFrom(current_method); } else { - TODO_VIXL32(FATAL); + DCHECK(invoke->GetLocations()->Intrinsified()); + DCHECK(!current_method.IsValid()); + method_reg = temp_reg; + GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, sp, kCurrentMethodStackOffset); } // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; GetAssembler()->LoadFromOffset( @@ -4942,9 +5250,31 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location } // Copy the result of a call into the given target. -void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, - Primitive::Type type ATTRIBUTE_UNUSED) { - TODO_VIXL32(FATAL); +void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) { + if (!trg.IsValid()) { + DCHECK_EQ(type, Primitive::kPrimVoid); + return; + } + + DCHECK_NE(type, Primitive::kPrimVoid); + + Location return_loc = InvokeDexCallingConventionVisitorARM().GetReturnLocation(type); + if (return_loc.Equals(trg)) { + return; + } + + // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged + // with the last branch. + if (type == Primitive::kPrimLong) { + TODO_VIXL32(FATAL); + } else if (type == Primitive::kPrimDouble) { + TODO_VIXL32(FATAL); + } else { + // Let the parallel move resolver take care of all of this. + HParallelMove parallel_move(GetGraph()->GetArena()); + parallel_move.AddMove(return_loc, trg, type, nullptr); + GetMoveResolver()->EmitNativeCode(¶llel_move); + } } #undef __ diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index df7d46782d..c583a44924 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -113,6 +113,7 @@ class LoadClassSlowPathARMVIXL; M(BelowOrEqual) \ M(BooleanNot) \ M(BoundsCheck) \ + M(CheckCast) \ M(ClearException) \ M(ClinitCheck) \ M(Compare) \ @@ -171,7 +172,6 @@ class LoadClassSlowPathARMVIXL; // TODO: Remove once the VIXL32 backend is implemented completely. #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M) \ M(BoundType) \ - M(CheckCast) \ M(ClassTableGet) \ M(InstanceOf) \ M(InvokeInterface) \ @@ -344,6 +344,22 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a Baker's (fast + // path) read barrier and shall be a register in that case; it may + // be an invalid location otherwise. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp); + // Generate a GC root reference load: // // root <- *(obj + offset) @@ -473,11 +489,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { return 0; } - size_t RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE { - UNIMPLEMENTED(INFO) << "TODO: RestoreFloatingPointRegister"; - return 0; - } + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { return type == Primitive::kPrimDouble || type == Primitive::kPrimLong; @@ -513,6 +525,62 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Register value, bool can_be_null); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch32::Register obj, + uint32_t offset, + Location temp, + bool needs_null_check); + + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at the address + // `obj + offset + (index << scale_factor)`, held by object `obj`, into + // `ref`, and mark it if needed. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). This operation + // requires an extra temporary register, which must be provided as a + // non-null pointer (`temp2`). + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch32::Register obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + Location temp, + bool needs_null_check, + bool always_update_field = false, + vixl::aarch32::Register* temp2 = nullptr); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + // If read barriers are enabled, generate a read barrier for a heap // reference using a slow path. If heap poisoning is enabled, also // unpoison the reference in `out`. diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 12b1ab9abb..0960c54408 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -378,7 +378,14 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) : locations->Out(); + Location arg0, arg1; + if (instruction_->IsInstanceOf()) { + arg0 = locations->InAt(1); + arg1 = locations->Out(); + } else { + arg0 = locations->InAt(0); + arg1 = locations->InAt(1); + } uint32_t dex_pc = instruction_->GetDexPc(); DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); @@ -390,24 +397,22 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. InvokeRuntimeCallingConvention calling_convention; - codegen->EmitParallelMoves(locations->InAt(1), + codegen->EmitParallelMoves(arg0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot, - object_class, + arg1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot); - if (instruction_->IsInstanceOf()) { mips_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); - CheckEntrypointTypes< - kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Class*, mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); } else { DCHECK(instruction_->IsCheckCast()); - mips_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); + mips_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); } RestoreLiveRegisters(codegen, locations); @@ -5204,6 +5209,11 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( case HLoadString::LoadKind::kDexCacheViaMethod: fallback_load = false; break; + case HLoadString::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + // TODO: implement. + fallback_load = true; + break; } if (fallback_load) { desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 010bf24232..7598740d3c 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -322,7 +322,15 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) : locations->Out(); + Location arg0, arg1; + if (instruction_->IsInstanceOf()) { + arg0 = locations->InAt(1); + arg1 = locations->Out(); + } else { + arg0 = locations->InAt(0); + arg1 = locations->InAt(1); + } + uint32_t dex_pc = instruction_->GetDexPc(); DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); @@ -334,24 +342,23 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. InvokeRuntimeCallingConvention calling_convention; - codegen->EmitParallelMoves(locations->InAt(1), + codegen->EmitParallelMoves(arg0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot, - object_class, + arg1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot); - if (instruction_->IsInstanceOf()) { mips64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); CheckEntrypointTypes< - kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); + kQuickInstanceofNonTrivial, size_t, mirror::Class*, mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); } else { DCHECK(instruction_->IsCheckCast()); - mips64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); + mips64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); } RestoreLiveRegisters(codegen, locations); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index efd33c7025..a2596379d7 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -312,8 +312,14 @@ class TypeCheckSlowPathX86 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) - : locations->Out(); + Location arg0, arg1; + if (instruction_->IsInstanceOf()) { + arg0 = locations->InAt(1); + arg1 = locations->Out(); + } else { + arg0 = locations->InAt(0); + arg1 = locations->InAt(1); + } DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); @@ -327,25 +333,25 @@ class TypeCheckSlowPathX86 : public SlowPathCode { // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. InvokeRuntimeCallingConvention calling_convention; - x86_codegen->EmitParallelMoves( - locations->InAt(1), - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, - object_class, - Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot); - + x86_codegen->EmitParallelMoves(arg0, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + arg1, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes< - kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Class*, mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); - x86_codegen->InvokeRuntime(kQuickCheckCast, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); + x86_codegen->InvokeRuntime(kQuickCheckInstanceOf, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); } if (!is_fatal_) { @@ -6217,6 +6223,9 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( break; case HLoadString::LoadKind::kDexCacheViaMethod: break; + case HLoadString::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + return HLoadString::LoadKind::kDexCacheViaMethod; } return desired_string_load_kind; } @@ -6645,26 +6654,17 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - NearLabel loop, compare_classes; + NearLabel loop; __ Bind(&loop); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc); - // If the class reference currently in `temp` is not null, jump - // to the `compare_classes` label to compare it with the checked - // class. + // If the class reference currently in `temp` is null, jump to the slow path to throw the + // exception. __ testl(temp, temp); - __ j(kNotEqual, &compare_classes); - // Otherwise, jump to the slow path to throw the exception. - // - // But before, move back the object's class into `temp` before - // going into the slow path, as it has been overwritten in the - // meantime. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - __ jmp(type_check_slow_path->GetEntryLabel()); + __ j(kZero, type_check_slow_path->GetEntryLabel()); - __ Bind(&compare_classes); + // Otherwise, compare the classes if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<Register>()); } else { @@ -6693,21 +6693,14 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // If the class reference currently in `temp` is not null, jump // back at the beginning of the loop. __ testl(temp, temp); - __ j(kNotEqual, &loop); - // Otherwise, jump to the slow path to throw the exception. - // - // But before, move back the object's class into `temp` before - // going into the slow path, as it has been overwritten in the - // meantime. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + __ j(kNotZero, &loop); + // Otherwise, jump to the slow path to throw the exception.; __ jmp(type_check_slow_path->GetEntryLabel()); break; } case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. - NearLabel check_non_primitive_component_type; if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<Register>()); } else { @@ -6720,28 +6713,13 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc); - // If the component type is not null (i.e. the object is indeed - // an array), jump to label `check_non_primitive_component_type` - // to further check that this component type is not a primitive - // type. + // If the component type is null (i.e. the object not an array), jump to the slow path to + // throw the exception. Otherwise proceed with the check. __ testl(temp, temp); - __ j(kNotEqual, &check_non_primitive_component_type); - // Otherwise, jump to the slow path to throw the exception. - // - // But before, move back the object's class into `temp` before - // going into the slow path, as it has been overwritten in the - // meantime. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - __ jmp(type_check_slow_path->GetEntryLabel()); + __ j(kZero, type_check_slow_path->GetEntryLabel()); - __ Bind(&check_non_primitive_component_type); __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kEqual, &done); - // Same comment as above regarding `temp` and the slow path. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - __ jmp(type_check_slow_path->GetEntryLabel()); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); break; } @@ -7093,7 +7071,7 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering // HeapReference<Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. // } @@ -7111,14 +7089,13 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); - // if (rb_state == ReadBarrier::gray_ptr_) + // if (rb_state == ReadBarrier::GrayState()) // ref = ReadBarrier::Mark(ref); // At this point, just do the "if" and make sure that flags are preserved until the branch. __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index fcabeeae5d..a1d22f8c93 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -332,8 +332,14 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) - : locations->Out(); + Location arg0, arg1; + if (instruction_->IsInstanceOf()) { + arg0 = locations->InAt(1); + arg1 = locations->Out(); + } else { + arg0 = locations->InAt(0); + arg1 = locations->InAt(1); + } uint32_t dex_pc = instruction_->GetDexPc(); DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); @@ -348,22 +354,19 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. InvokeRuntimeCallingConvention calling_convention; - codegen->EmitParallelMoves( - locations->InAt(1), - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, - object_class, - Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot); - + codegen->EmitParallelMoves(arg0, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + arg1, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); - CheckEntrypointTypes< - kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>(); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Class*, mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); - x86_64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this); - CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); + x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); } if (!is_fatal_) { @@ -1263,7 +1266,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -5632,6 +5636,9 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( break; case HLoadString::LoadKind::kDexCacheViaMethod: break; + case HLoadString::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + break; } return desired_string_load_kind; } @@ -5661,6 +5668,14 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { } } +Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file, uint32_t dex_index) { + jit_string_roots_.Overwrite(StringReference(&dex_file, dex_index), /* placeholder */ 0u); + // Add a patch entry and return the label. + jit_string_patches_.emplace_back(dex_file, dex_index); + PatchInfo<Label>* info = &jit_string_patches_.back(); + return &info->label; +} + void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { LocationSummary* locations = load->GetLocations(); Location out_loc = locations->Out(); @@ -5692,6 +5707,15 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { __ Bind(slow_path->GetExitLabel()); return; } + case HLoadString::LoadKind::kJitTableAddress: { + Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, + /* no_rip */ true); + Label* fixup_label = + codegen_->NewJitRootStringPatch(load->GetDexFile(), load->GetStringIndex()); + // /* GcRoot<mirror::String> */ out = *address + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier); + return; + } default: break; } @@ -5740,7 +5764,19 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } -static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { +static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { + if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) { + // We need a temporary for holding the iftable length. + return true; + } + return kEmitCompilerReadBarrier && + !kUseBakerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck); +} + +static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { return kEmitCompilerReadBarrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -5778,7 +5814,7 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for // some cases. - if (TypeCheckNeedsATemporary(type_check_kind)) { + if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } @@ -5791,7 +5827,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { Location cls = locations->InAt(1); Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); - Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ? + Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -5809,7 +5845,11 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); switch (type_check_kind) { case TypeCheckKind::kExactCheck: { @@ -5970,33 +6010,45 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } } -void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); +bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. - break; + return !throws_into_catch && !kEmitCompilerReadBarrier; + case TypeCheckKind::kInterfaceCheck: + return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; + return false; } + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + +void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { + bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_fatal_slow_path = IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch); + LocationSummary::CallKind call_kind = is_fatal_slow_path + ? LocationSummary::kNoCall + : LocationSummary::kCallOnSlowPath; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); + if (type_check_kind == TypeCheckKind::kInterfaceCheck) { + // Require a register for the interface check since there is a loop that compares the class to + // a memory address. + locations->SetInAt(1, Location::RequiresRegister()); + } else { + locations->SetInAt(1, Location::Any()); + } + // Note that TypeCheckSlowPathX86_64 uses this "temp" register too. locations->AddTemp(Location::RequiresRegister()); // When read barriers are enabled, we need an additional temporary // register for some cases. - if (TypeCheckNeedsATemporary(type_check_kind)) { + if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } @@ -6009,20 +6061,19 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? + Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ? locations->GetTemp(1) : Location::NoLocation(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); + const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); + const int object_array_data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); bool is_type_check_slow_path_fatal = - (type_check_kind == TypeCheckKind::kExactCheck || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck) && - !instruction->CanThrowIntoCatchBlock(); + IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock()); SlowPathCode* type_check_slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, is_type_check_slow_path_fatal); @@ -6039,8 +6090,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); } else { @@ -6063,30 +6117,23 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - NearLabel loop, compare_classes; + NearLabel loop; __ Bind(&loop); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc); - // If the class reference currently in `temp` is not null, jump - // to the `compare_classes` label to compare it with the checked - // class. + // If the class reference currently in `temp` is null, jump to the slow path to throw the + // exception. __ testl(temp, temp); - __ j(kNotEqual, &compare_classes); - // Otherwise, jump to the slow path to throw the exception. - // - // But before, move back the object's class into `temp` before - // going into the slow path, as it has been overwritten in the - // meantime. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - __ jmp(type_check_slow_path->GetEntryLabel()); - - __ Bind(&compare_classes); + // Otherwise, compare the classes. + __ j(kZero, type_check_slow_path->GetEntryLabel()); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); } else { @@ -6107,8 +6154,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); // Walk over the class hierarchy to find a match. NearLabel loop; __ Bind(&loop); @@ -6126,14 +6176,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // If the class reference currently in `temp` is not null, jump // back at the beginning of the loop. __ testl(temp, temp); - __ j(kNotEqual, &loop); + __ j(kNotZero, &loop); // Otherwise, jump to the slow path to throw the exception. - // - // But before, move back the object's class into `temp` before - // going into the slow path, as it has been overwritten in the - // meantime. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6152,8 +6196,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); // Do an exact check. NearLabel check_non_primitive_component_type; if (cls.IsRegister()) { @@ -6173,23 +6220,10 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // to further check that this component type is not a primitive // type. __ testl(temp, temp); - __ j(kNotEqual, &check_non_primitive_component_type); // Otherwise, jump to the slow path to throw the exception. - // - // But before, move back the object's class into `temp` before - // going into the slow path, as it has been overwritten in the - // meantime. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - __ jmp(type_check_slow_path->GetEntryLabel()); - - __ Bind(&check_non_primitive_component_type); + __ j(kZero, type_check_slow_path->GetEntryLabel()); __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kEqual, &done); - // Same comment as above regarding `temp` and the slow path. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - __ jmp(type_check_slow_path->GetEntryLabel()); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; } @@ -6197,17 +6231,15 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: NearLabel done; + // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { __ testl(obj, obj); __ j(kEqual, &done); } - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - // We always go into the type check slow path for the unresolved - // and interface check cases. + // We always go into the type check slow path for the unresolved case. // // We cannot directly call the CheckCast runtime entry point // without resorting to a type checking slow path here (i.e. by @@ -6223,6 +6255,46 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // case of the `switch` code as it was previously (with a direct // call to the runtime not using a type checking slow path). // This should also be beneficial for the other cases above. + + // Fast path for the interface check. Since we compare with a memory location in the inner + // loop we would need to have cls poisoned. However unpoisoning cls would reset the + // conditional flags and cause the conditional jump to be incorrect. + if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) { + // Try to avoid read barriers to improve the fast path. We can not get false positives by + // doing this. + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + /*emit_read_barrier*/ false); + + // /* HeapReference<Class> */ temp = temp->iftable_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + temp_loc, + iftable_offset, + /*emit_read_barrier*/ false); + NearLabel is_null; + // Null iftable means it is empty. + __ testl(temp_loc.AsRegister<CpuRegister>(), temp_loc.AsRegister<CpuRegister>()); + __ j(kZero, &is_null); + + // Loop through the iftable and check if any class matches. + __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), + Address(temp_loc.AsRegister<CpuRegister>(), array_length_offset)); + + NearLabel start_loop; + __ Bind(&start_loop); + __ cmpl(cls.AsRegister<CpuRegister>(), + Address(temp_loc.AsRegister<CpuRegister>(), object_array_data_offset)); + __ j(kEqual, &done); // Return if same class. + // Go to next interface. + __ addq(temp_loc.AsRegister<CpuRegister>(), Immediate(2 * kHeapReferenceSize)); + __ subq(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2)); + __ j(kNotZero, &start_loop); + __ Bind(&is_null); + } __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6397,10 +6469,11 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset) { + uint32_t offset, + bool emit_read_barrier) { CpuRegister out_reg = out.AsRegister<CpuRegister>(); CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); - if (kEmitCompilerReadBarrier) { + if (emit_read_barrier) { if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6535,7 +6608,7 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering // HeapReference<Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. // } @@ -6553,14 +6626,13 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); - // if (rb_state == ReadBarrier::gray_ptr_) + // if (rb_state == ReadBarrier::GrayState()) // ref = ReadBarrier::Mark(ref); // At this point, just do the "if" and make sure that flags are preserved until the branch. __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); @@ -7029,6 +7101,20 @@ void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low, } } +void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { + for (const PatchInfo<Label>& info : jit_string_patches_) { + const auto& it = jit_string_roots_.find(StringReference(&info.dex_file, info.index)); + DCHECK(it != jit_string_roots_.end()); + size_t index_in_table = it->second; + uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; + uintptr_t address = + reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); + typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; + reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = + dchecked_integral_cast<uint32_t>(address); + } +} + #undef __ } // namespace x86_64 diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 8b19dad0d0..bc78b8cee6 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -248,7 +248,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset); + uint32_t offset, + bool emit_read_barrier); // Generate a GC root reference load: // // root <- *address @@ -410,11 +411,14 @@ class CodeGeneratorX86_64 : public CodeGenerator { void RecordTypePatch(HLoadClass* load_class); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); + Label* NewJitRootStringPatch(const DexFile& dex_file, uint32_t dex_index); void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } @@ -600,6 +604,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Fixups for jump tables need to be handled specially. ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; + // Patches for string literals in JIT compiled code. + ArenaDeque<PatchInfo<Label>> jit_string_patches_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); }; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 9ec32df578..ac83bd9b0c 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -259,7 +259,7 @@ static void ValidateGraph(HGraph* graph) { GraphChecker graph_checker(graph); graph_checker.Run(); if (!graph_checker.IsValid()) { - for (auto error : graph_checker.GetErrors()) { + for (const auto& error : graph_checker.GetErrors()) { std::cout << error << std::endl; } } @@ -269,7 +269,7 @@ static void ValidateGraph(HGraph* graph) { template <typename Expected> static void RunCodeNoCheck(CodeGenerator* codegen, HGraph* graph, - std::function<void(HGraph*)> hook_before_codegen, + const std::function<void(HGraph*)>& hook_before_codegen, bool has_result, Expected expected) { SsaLivenessAnalysis liveness(graph, codegen); diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index 8c08a9c8b9..13824ad671 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -135,6 +135,16 @@ inline vixl::aarch32::Register InputRegister(HInstruction* instr) { return InputRegisterAt(instr, 0); } +inline int32_t Int32ConstantFrom(Location location) { + HConstant* instr = location.GetConstant(); + if (instr->IsIntConstant()) { + return instr->AsIntConstant()->GetValue(); + } else { + DCHECK(instr->IsNullConstant()) << instr->DebugName(); + return 0; + } +} + inline int64_t Int64ConstantFrom(Location location) { HConstant* instr = location.GetConstant(); if (instr->IsIntConstant()) { diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h index e10b1d6b2e..05c6df4a93 100644 --- a/compiler/optimizing/constant_folding.h +++ b/compiler/optimizing/constant_folding.h @@ -39,8 +39,7 @@ namespace art { */ class HConstantFolding : public HOptimization { public: - HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName) - : HOptimization(graph, name) {} + HConstantFolding(HGraph* graph, const char* name) : HOptimization(graph, name) {} void Run() OVERRIDE; diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index d1a2a2649a..5fac3acb8a 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -42,7 +42,7 @@ class ConstantFoldingTest : public CommonCompilerTest { const std::string& expected_before, const std::string& expected_after_cf, const std::string& expected_after_dce, - std::function<void(HGraph*)> check_after_cf, + const std::function<void(HGraph*)>& check_after_cf, Primitive::Type return_type = Primitive::kPrimInt) { graph_ = CreateCFG(&allocator_, data, return_type); TestCodeOnReadyGraph(expected_before, @@ -54,7 +54,7 @@ class ConstantFoldingTest : public CommonCompilerTest { void TestCodeOnReadyGraph(const std::string& expected_before, const std::string& expected_after_cf, const std::string& expected_after_dce, - std::function<void(HGraph*)> check_after_cf) { + const std::function<void(HGraph*)>& check_after_cf) { ASSERT_NE(graph_, nullptr); StringPrettyPrinter printer_before(graph_); @@ -65,7 +65,7 @@ class ConstantFoldingTest : public CommonCompilerTest { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegenX86(graph_, *features_x86.get(), CompilerOptions()); - HConstantFolding(graph_).Run(); + HConstantFolding(graph_, "constant_folding").Run(); GraphChecker graph_checker_cf(graph_); graph_checker_cf.Run(); ASSERT_TRUE(graph_checker_cf.IsValid()); @@ -77,7 +77,7 @@ class ConstantFoldingTest : public CommonCompilerTest { check_after_cf(graph_); - HDeadCodeElimination(graph_).Run(); + HDeadCodeElimination(graph_, nullptr /* stats */, "dead_code_elimination").Run(); GraphChecker graph_checker_dce(graph_); graph_checker_dce.Run(); ASSERT_TRUE(graph_checker_dce.IsValid()); diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 9de521ad8d..c31c66a056 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -161,8 +161,21 @@ static HConstant* Evaluate(HCondition* condition, HInstruction* left, HInstructi // | | | // B4 B5 B? // -// This simplification cannot be applied for loop headers, as they -// contain a suspend check. +// Note that individual edges can be redirected (for example B2->B3 +// can be redirected as B2->B5) without applying this optimization +// to other incoming edges. +// +// This simplification cannot be applied to catch blocks, because +// exception handler edges do not represent normal control flow. +// Though in theory this could still apply to normal control flow +// going directly to a catch block, we cannot support it at the +// moment because the catch Phi's inputs do not correspond to the +// catch block's predecessors, so we cannot identify which +// predecessor corresponds to a given statically evaluated input. +// +// We do not apply this optimization to loop headers as this could +// create irreducible loops. We rely on the suspend check in the +// loop header to prevent the pattern match. // // Note that we rely on the dead code elimination to get rid of B3. bool HDeadCodeElimination::SimplifyIfs() { @@ -172,7 +185,8 @@ bool HDeadCodeElimination::SimplifyIfs() { for (HBasicBlock* block : graph_->GetReversePostOrder()) { HInstruction* last = block->GetLastInstruction(); HInstruction* first = block->GetFirstInstruction(); - if (last->IsIf() && + if (!block->IsCatchBlock() && + last->IsIf() && block->HasSinglePhi() && block->GetFirstPhi()->HasOnlyOneNonEnvironmentUse()) { bool has_only_phi_and_if = (last == first) && (last->InputAt(0) == block->GetFirstPhi()); diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 58e700deba..84fd890eee 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -29,9 +29,7 @@ namespace art { */ class HDeadCodeElimination : public HOptimization { public: - HDeadCodeElimination(HGraph* graph, - OptimizingCompilerStats* stats = nullptr, - const char* name = kDeadCodeEliminationPassName) + HDeadCodeElimination(HGraph* graph, OptimizingCompilerStats* stats, const char* name) : HOptimization(graph, name, stats) {} void Run() OVERRIDE; diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index fe52aacef7..fdd77e7261 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -44,7 +44,7 @@ static void TestCode(const uint16_t* data, std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions()); - HDeadCodeElimination(graph).Run(); + HDeadCodeElimination(graph, nullptr /* stats */, "dead_code_elimination").Run(); GraphChecker graph_checker(graph); graph_checker.Run(); ASSERT_TRUE(graph_checker.IsValid()); diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 7cc8b1ea4c..235793d8d2 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -58,22 +58,90 @@ static bool IsIntAndGet(HInstruction* instruction, int64_t* value) { } /** - * An upper bound a * (length / a) + b, where a >= 1, can be conservatively rewritten as length + b - * because length >= 0 is true. This makes it more likely the bound is useful to clients. + * Detects an instruction that is >= 0. As long as the value is carried by + * a single instruction, arithmetic wrap-around cannot occur. */ -static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) { - int64_t value; - if (v.is_known && - v.a_constant >= 1 && - v.instruction->IsDiv() && - v.instruction->InputAt(0)->IsArrayLength() && - IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) { - return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant); +static bool IsGEZero(HInstruction* instruction) { + DCHECK(instruction != nullptr); + if (instruction->IsArrayLength()) { + return true; + } else if (instruction->IsInvokeStaticOrDirect()) { + switch (instruction->AsInvoke()->GetIntrinsic()) { + case Intrinsics::kMathMinIntInt: + case Intrinsics::kMathMinLongLong: + // Instruction MIN(>=0, >=0) is >= 0. + return IsGEZero(instruction->InputAt(0)) && + IsGEZero(instruction->InputAt(1)); + case Intrinsics::kMathAbsInt: + case Intrinsics::kMathAbsLong: + // Instruction ABS(x) is >= 0. + return true; + default: + break; + } + } + int64_t value = -1; + return IsIntAndGet(instruction, &value) && value >= 0; +} + +/** Hunts "under the hood" for a suitable instruction at the hint. */ +static bool IsMaxAtHint( + HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) { + if (instruction->IsInvokeStaticOrDirect()) { + switch (instruction->AsInvoke()->GetIntrinsic()) { + case Intrinsics::kMathMinIntInt: + case Intrinsics::kMathMinLongLong: + // For MIN(x, y), return most suitable x or y as maximum. + return IsMaxAtHint(instruction->InputAt(0), hint, suitable) || + IsMaxAtHint(instruction->InputAt(1), hint, suitable); + default: + break; + } + } else { + *suitable = instruction; + while (instruction->IsArrayLength() || + instruction->IsNullCheck() || + instruction->IsNewArray()) { + instruction = instruction->InputAt(0); + } + return instruction == hint; + } + return false; +} + +/** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */ +static InductionVarRange::Value SimplifyMin(InductionVarRange::Value v) { + if (v.is_known && v.a_constant == 1 && v.b_constant <= 0) { + // If a == 1, instruction >= 0 and b <= 0, just return the constant b. + // No arithmetic wrap-around can occur. + if (IsGEZero(v.instruction)) { + return InductionVarRange::Value(v.b_constant); + } } return v; } -/** Helper method to test for a constant value. */ +/** Post-analysis simplification of a maximum value that makes the bound more useful to clients. */ +static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v, HInstruction* hint) { + if (v.is_known && v.a_constant >= 1) { + // An upper bound a * (length / a) + b, where a >= 1, can be conservatively rewritten as + // length + b because length >= 0 is true. + int64_t value; + if (v.instruction->IsDiv() && + v.instruction->InputAt(0)->IsArrayLength() && + IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) { + return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant); + } + // If a == 1, the most suitable one suffices as maximum value. + HInstruction* suitable = nullptr; + if (v.a_constant == 1 && IsMaxAtHint(v.instruction, hint, &suitable)) { + return InductionVarRange::Value(suitable, 1, v.b_constant); + } + } + return v; +} + +/** Tests for a constant value. */ static bool IsConstantValue(InductionVarRange::Value v) { return v.is_known && v.a_constant == 0; } @@ -97,7 +165,7 @@ static InductionVarRange::Value CorrectForType(InductionVarRange::Value v, Primi } } -/** Helper method to insert an instruction. */ +/** Inserts an instruction. */ static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { DCHECK(block != nullptr); DCHECK(block->GetLastInstruction() != nullptr) << block->GetBlockId(); @@ -106,7 +174,7 @@ static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { return instruction; } -/** Helper method to obtain loop's control instruction. */ +/** Obtains loop's control instruction. */ static HInstruction* GetLoopControl(HLoopInformation* loop) { DCHECK(loop != nullptr); return loop->GetHeader()->GetLastInstruction(); @@ -150,9 +218,14 @@ bool InductionVarRange::GetInductionRange(HInstruction* context, chase_hint_ = chase_hint; bool in_body = context->GetBlock() != loop->GetHeader(); int64_t stride_value = 0; - *min_val = GetVal(info, trip, in_body, /* is_min */ true); - *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false)); + *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true)); + *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false), chase_hint); *needs_finite_test = NeedsTripCount(info, &stride_value) && IsUnsafeTripCount(trip); + chase_hint_ = nullptr; + // Retry chasing constants for wrap-around (merge sensitive). + if (!min_val->is_known && info->induction_class == HInductionVarAnalysis::kWrapAround) { + *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true)); + } return true; } @@ -175,7 +248,7 @@ bool InductionVarRange::CanGenerateRange(HInstruction* context, needs_taken_test) && (stride_value == -1 || stride_value == 0 || - stride_value == 1); // avoid wrap-around anomalies. + stride_value == 1); // avoid arithmetic wrap-around anomalies. } void InductionVarRange::GenerateRange(HInstruction* context, @@ -302,7 +375,8 @@ bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, return true; } } - // Try range analysis on the invariant, but only on proper range to avoid wrap-around anomalies. + // Try range analysis on the invariant, only accept a proper range + // to avoid arithmetic wrap-around anomalies. Value min_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ true); Value max_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ false); if (IsConstantValue(min_val) && @@ -450,25 +524,26 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, HInductionVarAnalysis::InductionInfo* trip, bool in_body, bool is_min) const { - // Stop chasing the instruction at constant or hint. - int64_t value; - if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) { - return Value(static_cast<int32_t>(value)); - } else if (instruction == chase_hint_) { - return Value(instruction, 1, 0); - } - // Special cases when encountering a single instruction that denotes trip count in the - // loop-body: min is 1 and, when chasing constants, max of safe trip-count is max int - if (in_body && trip != nullptr && instruction == trip->op_a->fetch) { + // Special case when chasing constants: single instruction that denotes trip count in the + // loop-body is minimal 1 and maximal, with safe trip-count, max int, + if (chase_hint_ == nullptr && in_body && trip != nullptr && instruction == trip->op_a->fetch) { if (is_min) { return Value(1); - } else if (chase_hint_ == nullptr && !IsUnsafeTripCount(trip)) { + } else if (!IsUnsafeTripCount(trip)) { return Value(std::numeric_limits<int32_t>::max()); } } - // Chase the instruction a bit deeper into the HIR tree, so that it becomes more likely - // range analysis will compare the same instructions as terminal nodes. - if (instruction->IsAdd()) { + // Unless at a constant or hint, chase the instruction a bit deeper into the HIR tree, so that + // it becomes more likely range analysis will compare the same instructions as terminal nodes. + int64_t value; + if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) { + // Proper constant reveals best information. + return Value(static_cast<int32_t>(value)); + } else if (instruction == chase_hint_) { + // At hint, fetch is represented by itself. + return Value(instruction, 1, 0); + } else if (instruction->IsAdd()) { + // Incorporate suitable constants in the chased value. if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) { return AddValue(Value(static_cast<int32_t>(value)), GetFetch(instruction->InputAt(1), trip, in_body, is_min)); @@ -477,14 +552,14 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, Value(static_cast<int32_t>(value))); } } else if (instruction->IsArrayLength()) { - // Return extreme values when chasing constants. Otherwise, chase deeper. + // Exploit length properties when chasing constants or chase into a new array declaration. if (chase_hint_ == nullptr) { return is_min ? Value(0) : Value(std::numeric_limits<int32_t>::max()); } else if (instruction->InputAt(0)->IsNewArray()) { return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min); } } else if (instruction->IsTypeConversion()) { - // Since analysis is 32-bit (or narrower) we allow a widening along the path. + // Since analysis is 32-bit (or narrower), chase beyond widening along the path. if (instruction->AsTypeConversion()->GetInputType() == Primitive::kPrimInt && instruction->AsTypeConversion()->GetResultType() == Primitive::kPrimLong) { return GetFetch(instruction->InputAt(0), trip, in_body, is_min); @@ -506,6 +581,7 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, !IsUnsafeTripCount(next_trip)) { return GetVal(next_info, next_trip, next_in_body, is_min); } + // Fetch is represented by itself. return Value(instruction, 1, 0); } @@ -870,10 +946,11 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, HInstruction* opb = nullptr; switch (info->induction_class) { case HInductionVarAnalysis::kInvariant: - // Invariants. + // Invariants (note that even though is_min does not impact code generation for + // invariants, some effort is made to keep this parameter consistent). switch (info->operation) { case HInductionVarAnalysis::kAdd: - case HInductionVarAnalysis::kXor: + case HInductionVarAnalysis::kXor: // no proper is_min for second arg case HInductionVarAnalysis::kLT: case HInductionVarAnalysis::kLE: case HInductionVarAnalysis::kGT: diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index cc420b3260..9e816237dd 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -1315,8 +1315,8 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph, const DexCompilationUnit& dex_compilation_unit) { // Note: if the outermost_graph_ is being compiled OSR, we should not run any // optimization that could lead to a HDeoptimize. The following optimizations do not. - HDeadCodeElimination dce(callee_graph, stats_); - HConstantFolding fold(callee_graph); + HDeadCodeElimination dce(callee_graph, stats_, "dead_code_elimination$inliner"); + HConstantFolding fold(callee_graph, "constant_folding$inliner"); HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_); InstructionSimplifier simplify(callee_graph, stats_); IntrinsicsRecognizer intrinsics(callee_graph, stats_); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index e4d280f26d..e06fdee370 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -111,9 +111,11 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; int simplifications_at_current_position_ = 0; - // We ensure we do not loop infinitely. The value is a finger in the air guess - // that should allow enough simplification. - static constexpr int kMaxSamePositionSimplifications = 10; + // We ensure we do not loop infinitely. The value should not be too high, since that + // would allow looping around the same basic block too many times. The value should + // not be too low either, however, since we want to allow revisiting a basic block + // with many statements and simplifications at least once. + static constexpr int kMaxSamePositionSimplifications = 50; }; void InstructionSimplifier::Run() { @@ -605,11 +607,23 @@ static HCondition* GetOppositeConditionSwapOps(ArenaAllocator* arena, HInstructi return nullptr; } +static bool CmpHasBoolType(HInstruction* input, HInstruction* cmp) { + if (input->GetType() == Primitive::kPrimBoolean) { + return true; // input has direct boolean type + } else if (cmp->GetUses().HasExactlyOneElement()) { + // Comparison also has boolean type if both its input and the instruction + // itself feed into the same phi node. + HInstruction* user = cmp->GetUses().front().GetUser(); + return user->IsPhi() && user->HasInput(input) && user->HasInput(cmp); + } + return false; +} + void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) { HInstruction* input_const = equal->GetConstantRight(); if (input_const != nullptr) { HInstruction* input_value = equal->GetLeastConstantLeft(); - if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) { + if (CmpHasBoolType(input_value, equal) && input_const->IsIntConstant()) { HBasicBlock* block = equal->GetBlock(); // We are comparing the boolean to a constant which is of type int and can // be any constant. @@ -619,6 +633,7 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) { block->RemoveInstruction(equal); RecordSimplification(); } else if (input_const->AsIntConstant()->IsFalse()) { + // Replace (bool_value == false) with !bool_value equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal)); block->RemoveInstruction(equal); RecordSimplification(); @@ -640,11 +655,12 @@ void InstructionSimplifierVisitor::VisitNotEqual(HNotEqual* not_equal) { HInstruction* input_const = not_equal->GetConstantRight(); if (input_const != nullptr) { HInstruction* input_value = not_equal->GetLeastConstantLeft(); - if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) { + if (CmpHasBoolType(input_value, not_equal) && input_const->IsIntConstant()) { HBasicBlock* block = not_equal->GetBlock(); // We are comparing the boolean to a constant which is of type int and can // be any constant. if (input_const->AsIntConstant()->IsTrue()) { + // Replace (bool_value != true) with !bool_value not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal)); block->RemoveInstruction(not_equal); RecordSimplification(); diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h index 782110c40a..9b54511340 100644 --- a/compiler/optimizing/instruction_simplifier_arm.h +++ b/compiler/optimizing/instruction_simplifier_arm.h @@ -48,7 +48,7 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor { class InstructionSimplifierArm : public HOptimization { public: InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {} + : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {} static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm"; diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index f71684efe9..d4cb1f14b7 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -82,9 +82,10 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { class InstructionSimplifierArm64 : public HOptimization { public: InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {} - static constexpr const char* kInstructionSimplifierArm64PassName - = "instruction_simplifier_arm64"; + : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {} + + static constexpr const char* kInstructionSimplifierArm64PassName = "instruction_simplifier_arm64"; + void Run() OVERRIDE { InstructionSimplifierArm64Visitor visitor(graph_, stats_); visitor.VisitReversePostOrder(); diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index 04e063c92e..c2b1374f62 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -231,15 +231,6 @@ bool TryExtractArrayAccessAddress(HInstruction* access, HInstruction* array, HInstruction* index, size_t data_offset) { - if (kEmitCompilerReadBarrier) { - // The read barrier instrumentation does not support the - // HIntermediateAddress instruction yet. - // - // TODO: Handle this case properly in the ARM64 and ARM code generator and - // re-enable this optimization; otherwise, remove this TODO. - // b/26601270 - return false; - } if (index->IsConstant() || (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) { // When the index is a constant all the addressing can be fitted in the @@ -251,14 +242,20 @@ bool TryExtractArrayAccessAddress(HInstruction* access, // The access may require a runtime call or the original array pointer. return false; } + if (kEmitCompilerReadBarrier && + access->IsArrayGet() && + access->GetType() == Primitive::kPrimNot) { + // For object arrays, the read barrier instrumentation requires + // the original array pointer. + return false; + } // Proceed to extract the base address computation. HGraph* graph = access->GetBlock()->GetGraph(); ArenaAllocator* arena = graph->GetArena(); HIntConstant* offset = graph->GetIntConstant(data_offset); - HIntermediateAddress* address = - new (arena) HIntermediateAddress(array, offset, kNoDexPc); + HIntermediateAddress* address = new (arena) HIntermediateAddress(array, offset, kNoDexPc); // TODO: Is it ok to not have this on the intermediate address? // address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); access->GetBlock()->InsertInstructionBefore(address, access); diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 8790c1e4f1..93a2340a32 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1945,7 +1945,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { // if (src_ptr != end_ptr) { // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // // Slow-path copy. // do { @@ -1986,9 +1986,8 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { // Given the numeric representation, it's enough to check the low bit of the // rb_state. We do that by shifting the bit out of the lock word with LSRS // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); // Carry flag is the last bit shifted out by LSRS. __ b(read_barrier_slow_path->GetEntryLabel(), CS); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index db1c022868..47e6d9699d 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2659,7 +2659,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { // if (src_ptr != end_ptr) { // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // // Slow-path copy. // do { @@ -2704,9 +2704,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { codegen_->AddSlowPath(read_barrier_slow_path); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); // Fast-path copy. diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc new file mode 100644 index 0000000000..6ff0ca4eab --- /dev/null +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -0,0 +1,2694 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_arm_vixl.h" + +#include "arch/arm/instruction_set_features_arm.h" +#include "code_generator_arm_vixl.h" +#include "common_arm.h" +#include "lock_word.h" +#include "mirror/array-inl.h" + +#include "aarch32/constants-aarch32.h" + +namespace art { +namespace arm { + +#define __ assembler->GetVIXLAssembler()-> + +using helpers::DRegisterFrom; +using helpers::HighRegisterFrom; +using helpers::InputDRegisterAt; +using helpers::InputRegisterAt; +using helpers::InputSRegisterAt; +using helpers::InputVRegisterAt; +using helpers::Int32ConstantFrom; +using helpers::LocationFrom; +using helpers::LowRegisterFrom; +using helpers::LowSRegisterFrom; +using helpers::OutputDRegister; +using helpers::OutputRegister; +using helpers::OutputVRegister; +using helpers::RegisterFrom; +using helpers::SRegisterFrom; + +using namespace vixl::aarch32; // NOLINT(build/namespaces) + +ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() { + return codegen_->GetAssembler(); +} + +ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() { + return codegen_->GetGraph()->GetArena(); +} + +// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an +// intrinsified call. This will copy the arguments into the positions for a regular call. +// +// Note: The actual parameters are required to be in the locations given by the invoke's location +// summary. If an intrinsic modifies those locations before a slowpath call, they must be +// restored! +// +// Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially +// sub-optimal (compared to a direct pointer call), but this is a slow-path. + +class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke) + : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {} + + Location MoveArguments(CodeGenerator* codegen) { + InvokeDexCallingConventionVisitorARM calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor); + return calling_convention_visitor.GetMethodLocation(); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler()); + __ Bind(GetEntryLabel()); + + SaveLiveRegisters(codegen, invoke_->GetLocations()); + + Location method_loc = MoveArguments(codegen); + + if (invoke_->IsInvokeStaticOrDirect()) { + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc); + } else { + codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc); + } + codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); + + // Copy the result back to the expected output. + Location out = invoke_->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. + DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + codegen->MoveFromReturnRegister(out, invoke_->GetType()); + } + + RestoreLiveRegisters(codegen, invoke_->GetLocations()); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; } + + private: + // The instruction where this slow path is happening. + HInvoke* const invoke_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL); +}; + +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction) + : SlowPathCodeARMVIXL(instruction) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + ArmVIXLAssembler* assembler = arm_codegen->GetAssembler(); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); + uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + vixl32::Register dest = InputRegisterAt(instruction_, 2); + Location dest_pos = locations->InAt(3); + vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0)); + vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1)); + vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2)); + vixl32::Register tmp = RegisterFrom(locations->GetTemp(3)); + + __ Bind(GetEntryLabel()); + // Compute the base destination address in `dst_curr_addr`. + if (dest_pos.IsConstant()) { + int32_t constant = Int32ConstantFrom(dest_pos); + __ Add(dst_curr_addr, dest, element_size * constant + offset); + } else { + __ Add(dst_curr_addr, + dest, + Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift)); + __ Add(dst_curr_addr, dst_curr_addr, offset); + } + + vixl32::Label loop; + __ Bind(&loop); + __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); + assembler->MaybeUnpoisonHeapReference(tmp); + // TODO: Inline the mark bit check before calling the runtime? + // tmp = ReadBarrier::Mark(tmp); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more + // explanations.) + DCHECK(!tmp.IsSP()); + DCHECK(!tmp.IsLR()); + DCHECK(!tmp.IsPC()); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary (and not preserved). It thus cannot be used by + // any live register in this slow path. + DCHECK(!src_curr_addr.Is(ip)); + DCHECK(!dst_curr_addr.Is(ip)); + DCHECK(!src_stop_addr.Is(ip)); + DCHECK(!tmp.Is(ip)); + DCHECK(tmp.IsRegister()) << tmp; + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode()); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + assembler->MaybePoisonHeapReference(tmp); + __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(ne, &loop); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierSystemArrayCopySlowPathARMVIXL"; + } + + private: + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL); +}; + +IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen) + : arena_(codegen->GetGraph()->GetArena()), + assembler_(codegen->GetAssembler()), + features_(codegen->GetInstructionSetFeatures()) {} + +bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + LocationSummary* res = invoke->GetLocations(); + if (res == nullptr) { + return false; + } + return res->Intrinsified(); +} + +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + if (is64bit) { + __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input)); + } else { + __ Vmov(RegisterFrom(output), SRegisterFrom(input)); + } +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + if (is64bit) { + __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input)); + } else { + __ Vmov(SRegisterFrom(output), RegisterFrom(input)); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); +} +void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); +} +void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +static void GenNumberOfLeadingZeros(LocationSummary* locations, + Primitive::Type type, + ArmVIXLAssembler* assembler) { + Location in = locations->InAt(0); + vixl32::Register out = RegisterFrom(locations->Out()); + + DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); + + if (type == Primitive::kPrimLong) { + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + vixl32::Label end; + __ Clz(out, in_reg_hi); + __ Cbnz(in_reg_hi, &end); + __ Clz(out, in_reg_lo); + __ Add(out, out, 32); + __ Bind(&end); + } else { + __ Clz(out, RegisterFrom(in)); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +static void GenNumberOfTrailingZeros(LocationSummary* locations, + Primitive::Type type, + ArmVIXLAssembler* assembler) { + DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); + + vixl32::Register out = RegisterFrom(locations->Out()); + + if (type == Primitive::kPrimLong) { + vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); + vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); + vixl32::Label end; + __ Rbit(out, in_reg_lo); + __ Clz(out, out); + __ Cbnz(in_reg_lo, &end); + __ Rbit(out, in_reg_hi); + __ Clz(out, out); + __ Add(out, out, 32); + __ Bind(&end); + } else { + vixl32::Register in = RegisterFrom(locations->InAt(0)); + __ Rbit(out, in); + __ Clz(out, out); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { + GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { + GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { + __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0)); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { + MathAbsFP(invoke, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { + MathAbsFP(invoke, GetAssembler()); +} + +static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenAbsInteger(LocationSummary* locations, + bool is64bit, + ArmVIXLAssembler* assembler) { + Location in = locations->InAt(0); + Location output = locations->Out(); + + vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); + + if (is64bit) { + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + vixl32::Register out_reg_lo = LowRegisterFrom(output); + vixl32::Register out_reg_hi = HighRegisterFrom(output); + + DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; + + __ Asr(mask, in_reg_hi, 31); + __ Adds(out_reg_lo, in_reg_lo, mask); + __ Adc(out_reg_hi, in_reg_hi, mask); + __ Eor(out_reg_lo, mask, out_reg_lo); + __ Eor(out_reg_hi, mask, out_reg_hi); + } else { + vixl32::Register in_reg = RegisterFrom(in); + vixl32::Register out_reg = RegisterFrom(output); + + __ Asr(mask, in_reg, 31); + __ Add(out_reg, in_reg, mask); + __ Eor(out_reg, mask, out_reg); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); +} + + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); +} + +static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { + vixl32::Register op1 = InputRegisterAt(invoke, 0); + vixl32::Register op2 = InputRegisterAt(invoke, 1); + vixl32::Register out = OutputRegister(invoke); + + __ Cmp(op1, op2); + + { + AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ ite(is_min ? lt : gt); + __ mov(is_min ? lt : gt, out, op1); + __ mov(is_min ? ge : le, out, op2); + } +} + +static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { + GenMinMax(invoke, /* is_min */ true, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { + GenMinMax(invoke, /* is_min */ false, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ Ldrsb(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0))); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ Ldr(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0))); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0)); + // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor + // exception. So we can't use ldrd as addr may be unaligned. + vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out()); + vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out()); + if (addr.Is(lo)) { + __ Ldr(hi, MemOperand(addr, 4)); + __ Ldr(lo, addr); + } else { + __ Ldr(lo, addr); + __ Ldr(hi, MemOperand(addr, 4)); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ Ldrsh(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0))); +} + +static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Strb(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0))); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Str(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0))); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0)); + // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor + // exception. So we can't use ldrd as addr may be unaligned. + __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), addr); + __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4)); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Strh(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0))); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Ldr(OutputRegister(invoke), + MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value())); +} + +static void GenUnsafeGet(HInvoke* invoke, + Primitive::Type type, + bool is_volatile, + CodeGeneratorARMVIXL* codegen) { + LocationSummary* locations = invoke->GetLocations(); + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + Location base_loc = locations->InAt(1); + vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer. + Location offset_loc = locations->InAt(2); + vixl32::Register offset = LowRegisterFrom(offset_loc); // Long offset, lo part only. + Location trg_loc = locations->Out(); + + switch (type) { + case Primitive::kPrimInt: { + vixl32::Register trg = RegisterFrom(trg_loc); + __ Ldr(trg, MemOperand(base, offset)); + if (is_volatile) { + __ Dmb(vixl32::ISH); + } + break; + } + + case Primitive::kPrimNot: { + vixl32::Register trg = RegisterFrom(trg_loc); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false); + if (is_volatile) { + __ Dmb(vixl32::ISH); + } + } else { + __ Ldr(trg, MemOperand(base, offset)); + if (is_volatile) { + __ Dmb(vixl32::ISH); + } + codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + } + } else { + __ Ldr(trg, MemOperand(base, offset)); + if (is_volatile) { + __ Dmb(vixl32::ISH); + } + assembler->MaybeUnpoisonHeapReference(trg); + } + break; + } + + case Primitive::kPrimLong: { + vixl32::Register trg_lo = LowRegisterFrom(trg_loc); + vixl32::Register trg_hi = HighRegisterFrom(trg_loc); + if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { + __ Ldrexd(trg_lo, trg_hi, MemOperand(base, offset)); + } else { + __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset)); + } + if (is_volatile) { + __ Dmb(vixl32::ISH); + } + break; + } + + default: + LOG(FATAL) << "Unexpected type " << type; + UNREACHABLE(); + } +} + +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); + LocationSummary* locations = new (arena) LocationSummary(invoke, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), + kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); +} + +static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, + const ArmInstructionSetFeatures& features, + Primitive::Type type, + bool is_volatile, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + + if (type == Primitive::kPrimLong) { + // Potentially need temps for ldrexd-strexd loop. + if (is_volatile && !features.HasAtomicLdrdAndStrd()) { + locations->AddTemp(Location::RequiresRegister()); // Temp_lo. + locations->AddTemp(Location::RequiresRegister()); // Temp_hi. + } + } else if (type == Primitive::kPrimNot) { + // Temps for card-marking. + locations->AddTemp(Location::RequiresRegister()); // Temp. + locations->AddTemp(Location::RequiresRegister()); // Card. + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { + CreateIntIntIntIntToVoid( + arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid( + arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid( + arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke); +} + +static void GenUnsafePut(LocationSummary* locations, + Primitive::Type type, + bool is_volatile, + bool is_ordered, + CodeGeneratorARMVIXL* codegen) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + + vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer. + vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only. + vixl32::Register value; + + if (is_volatile || is_ordered) { + __ Dmb(vixl32::ISH); + } + + if (type == Primitive::kPrimLong) { + vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3)); + vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3)); + value = value_lo; + if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { + vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0)); + vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1)); + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp_reg = temps.Acquire(); + + __ Add(temp_reg, base, offset); + vixl32::Label loop_head; + __ Bind(&loop_head); + __ Ldrexd(temp_lo, temp_hi, temp_reg); + __ Strexd(temp_lo, value_lo, value_hi, temp_reg); + __ Cmp(temp_lo, 0); + __ B(ne, &loop_head); + } else { + __ Strd(value_lo, value_hi, MemOperand(base, offset)); + } + } else { + value = RegisterFrom(locations->InAt(3)); + vixl32::Register source = value; + if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + __ Mov(temp, value); + assembler->PoisonHeapReference(temp); + source = temp; + } + __ Str(source, MemOperand(base, offset)); + } + + if (is_volatile) { + __ Dmb(vixl32::ISH); + } + + if (type == Primitive::kPrimNot) { + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + vixl32::Register card = RegisterFrom(locations->GetTemp(1)); + bool value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(temp, card, base, value, value_can_be_null); + } +} + +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); +} + +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); + LocationSummary* locations = new (arena) LocationSummary(invoke, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->SetInAt(4, Location::RequiresRegister()); + + // If heap poisoning is enabled, we don't want the unpoisoning + // operations to potentially clobber the output. Likewise when + // emitting a (Baker) read barrier, which may call. + Location::OutputOverlap overlaps = + ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap; + locations->SetOut(Location::RequiresRegister(), overlaps); + + // Temporary registers used in CAS. In the object case + // (UnsafeCASObject intrinsic), these are also used for + // card-marking, and possibly for (Baker) read barrier. + locations->AddTemp(Location::RequiresRegister()); // Pointer. + locations->AddTemp(Location::RequiresRegister()); // Temp 1. +} + +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) { + DCHECK_NE(type, Primitive::kPrimLong); + + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Location out_loc = locations->Out(); + vixl32::Register out = OutputRegister(invoke); // Boolean result. + + vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer. + Location offset_loc = locations->InAt(2); + vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B). + vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected. + vixl32::Register value = InputRegisterAt(invoke, 4); // Value. + + Location tmp_ptr_loc = locations->GetTemp(0); + vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory. + vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory. + + if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + // Mark card for object assuming new value is stored. Worst case we will mark an unchanged + // object and scan the receiver at the next GC for nothing. + bool value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + ScaleFactor::TIMES_1, + tmp_ptr_loc, + /* needs_null_check */ false, + /* always_update_field */ true, + &tmp); + } + } + + // Prevent reordering with prior memory operations. + // Emit a DMB ISH instruction instead of an DMB ISHST one, as the + // latter allows a preceding load to be delayed past the STXR + // instruction below. + __ Dmb(vixl32::ISH); + + __ Add(tmp_ptr, base, offset); + + if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + codegen->GetAssembler()->PoisonHeapReference(expected); + if (value.Is(expected)) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. + } else { + codegen->GetAssembler()->PoisonHeapReference(value); + } + } + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + // result = tmp != 0; + + vixl32::Label loop_head; + __ Bind(&loop_head); + + __ Ldrex(tmp, tmp_ptr); + + __ Subs(tmp, tmp, expected); + + { + AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ itt(eq); + __ strex(eq, tmp, value, tmp_ptr); + __ cmp(eq, tmp, 1); + } + + __ B(eq, &loop_head); + + __ Dmb(vixl32::ISH); + + __ Rsbs(out, tmp, 1); + + { + AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ it(cc); + __ mov(cc, out, 0); + } + + if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + codegen->GetAssembler()->UnpoisonHeapReference(expected); + if (value.Is(expected)) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. + } else { + codegen->GetAssembler()->UnpoisonHeapReference(value); + } + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + return; + } + + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { + GenCas(invoke, Primitive::kPrimInt, codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + GenCas(invoke, Primitive::kPrimNot, codegen_); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) { + // The inputs plus one temp. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + invoke->InputAt(1)->CanBeNull() + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + // Need temporary registers for String compression's feature. + if (mirror::kUseStringCompression) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + vixl32::Register str = InputRegisterAt(invoke, 0); + vixl32::Register arg = InputRegisterAt(invoke, 1); + vixl32::Register out = OutputRegister(invoke); + + vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0)); + vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); + vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); + vixl32::Register temp3, temp4; + if (mirror::kUseStringCompression) { + temp3 = RegisterFrom(locations->GetTemp(3)); + temp4 = RegisterFrom(locations->GetTemp(4)); + } + + vixl32::Label loop; + vixl32::Label find_char_diff; + vixl32::Label end; + vixl32::Label different_compression; + + // Get offsets of count and value fields within a string object. + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Take slow path and throw if input can be and is null. + SlowPathCodeARMVIXL* slow_path = nullptr; + const bool can_slow_path = invoke->InputAt(1)->CanBeNull(); + if (can_slow_path) { + slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); + codegen_->AddSlowPath(slow_path); + __ Cbz(arg, slow_path->GetEntryLabel()); + } + + // Reference equality check, return 0 if same reference. + __ Subs(out, str, arg); + __ B(eq, &end); + + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + vixl32::Register temp_reg = temps.Acquire(); + + if (mirror::kUseStringCompression) { + // Load lengths of this and argument strings. + __ Ldr(temp3, MemOperand(str, count_offset)); + __ Ldr(temp4, MemOperand(arg, count_offset)); + // Clean out compression flag from lengths. + __ Bic(temp0, temp3, 0x80000000); + __ Bic(temp_reg, temp4, 0x80000000); + } else { + // Load lengths of this and argument strings. + __ Ldr(temp0, MemOperand(str, count_offset)); + __ Ldr(temp_reg, MemOperand(arg, count_offset)); + } + // out = length diff. + __ Subs(out, temp0, temp_reg); + // temp0 = min(len(str), len(arg)). + + { + AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ it(gt); + __ mov(gt, temp0, temp_reg); + } + + temps.Release(temp_reg); + // Shorter string is empty? + __ Cbz(temp0, &end); + + if (mirror::kUseStringCompression) { + // Check if both strings using same compression style to use this comparison loop. + __ Eors(temp3, temp3, temp4); + __ B(mi, &different_compression); + } + // Store offset of string value in preparation for comparison loop. + __ Mov(temp1, value_offset); + if (mirror::kUseStringCompression) { + // For string compression, calculate the number of bytes to compare (not chars). + // This could in theory exceed INT32_MAX, so treat temp0 as unsigned. + __ Cmp(temp4, 0); + + AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ it(ge); + __ add(ge, temp0, temp0, temp0); + } + + // Assertions that must hold in order to compare multiple characters at a time. + CHECK_ALIGNED(value_offset, 8); + static_assert(IsAligned<8>(kObjectAlignment), + "String data must be 8-byte aligned for unrolled CompareTo loop."); + + const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + DCHECK_EQ(char_size, 2u); + + vixl32::Label find_char_diff_2nd_cmp; + // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment). + __ Bind(&loop); + temp_reg = temps.Acquire(); + __ Ldr(temp_reg, MemOperand(str, temp1)); + __ Ldr(temp2, MemOperand(arg, temp1)); + __ Cmp(temp_reg, temp2); + __ B(ne, &find_char_diff); + __ Add(temp1, temp1, char_size * 2); + + __ Ldr(temp_reg, MemOperand(str, temp1)); + __ Ldr(temp2, MemOperand(arg, temp1)); + __ Cmp(temp_reg, temp2); + __ B(ne, &find_char_diff_2nd_cmp); + __ Add(temp1, temp1, char_size * 2); + // With string compression, we have compared 8 bytes, otherwise 4 chars. + __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4)); + __ B(hi, &loop); + __ B(&end); + + __ Bind(&find_char_diff_2nd_cmp); + if (mirror::kUseStringCompression) { + __ Subs(temp0, temp0, 4); // 4 bytes previously compared. + __ B(ls, &end); // Was the second comparison fully beyond the end? + } else { + // Without string compression, we can start treating temp0 as signed + // and rely on the signed comparison below. + __ Sub(temp0, temp0, 2); + } + + // Find the single character difference. + __ Bind(&find_char_diff); + // Get the bit position of the first character that differs. + __ Eor(temp1, temp2, temp_reg); + __ Rbit(temp1, temp1); + __ Clz(temp1, temp1); + + // temp0 = number of characters remaining to compare. + // (Without string compression, it could be < 1 if a difference is found by the second CMP + // in the comparison loop, and after the end of the shorter string data). + + // Without string compression (temp1 >> 4) = character where difference occurs between the last + // two words compared, in the interval [0,1]. + // (0 for low half-word different, 1 for high half-word different). + // With string compression, (temp1 << 3) = byte where the difference occurs, + // in the interval [0,3]. + + // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside + // the remaining string data, so just return length diff (out). + // The comparison is unsigned for string compression, otherwise signed. + __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4))); + __ B((mirror::kUseStringCompression ? ls : le), &end); + // Extract the characters and calculate the difference. + vixl32::Label uncompressed_string, continue_process; + if (mirror::kUseStringCompression) { + __ Cmp(temp4, 0); + __ B(ge, &uncompressed_string); + __ Bic(temp1, temp1, 0x7); + __ B(&continue_process); + } + __ Bind(&uncompressed_string); + __ Bic(temp1, temp1, 0xf); + __ Bind(&continue_process); + + __ Lsr(temp2, temp2, temp1); + __ Lsr(temp_reg, temp_reg, temp1); + vixl32::Label calculate_difference, uncompressed_string_extract_chars; + if (mirror::kUseStringCompression) { + __ Cmp(temp4, 0); + __ B(ge, &uncompressed_string_extract_chars); + __ Ubfx(temp2, temp2, 0, 8); + __ Ubfx(temp_reg, temp_reg, 0, 8); + __ B(&calculate_difference); + } + __ Bind(&uncompressed_string_extract_chars); + __ Movt(temp2, 0); + __ Movt(temp_reg, 0); + __ Bind(&calculate_difference); + __ Sub(out, temp_reg, temp2); + temps.Release(temp_reg); + __ B(&end); + + if (mirror::kUseStringCompression) { + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + vixl32::Label loop_arg_compressed, loop_this_compressed, find_diff; + // Comparison for different compression style. + // This part is when THIS is compressed and ARG is not. + __ Bind(&different_compression); + __ Add(temp2, str, value_offset); + __ Add(temp3, arg, value_offset); + __ Cmp(temp4, 0); + __ B(lt, &loop_arg_compressed); + + __ Bind(&loop_this_compressed); + temp_reg = temps.Acquire(); + __ Ldrb(temp_reg, MemOperand(temp2, c_char_size, PostIndex)); + __ Ldrh(temp4, MemOperand(temp3, char_size, PostIndex)); + __ Cmp(temp_reg, temp4); + __ B(ne, &find_diff); + __ Subs(temp0, temp0, 1); + __ B(gt, &loop_this_compressed); + __ B(&end); + + // This part is when THIS is not compressed and ARG is. + __ Bind(&loop_arg_compressed); + __ Ldrh(temp_reg, MemOperand(temp2, char_size, PostIndex)); + __ Ldrb(temp4, MemOperand(temp3, c_char_size, PostIndex)); + __ Cmp(temp_reg, temp4); + __ B(ne, &find_diff); + __ Subs(temp0, temp0, 1); + __ B(gt, &loop_arg_compressed); + __ B(&end); + + // Calculate the difference. + __ Bind(&find_diff); + __ Sub(out, temp_reg, temp4); + temps.Release(temp_reg); + } + + __ Bind(&end); + + if (can_slow_path) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Temporary registers to store lengths of strings and for calculations. + // Using instruction cbz requires a low register, so explicitly set a temp to be R0. + locations->AddTemp(LocationFrom(r0)); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + vixl32::Register str = InputRegisterAt(invoke, 0); + vixl32::Register arg = InputRegisterAt(invoke, 1); + vixl32::Register out = OutputRegister(invoke); + + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); + vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); + + vixl32::Label loop, preloop; + vixl32::Label end; + vixl32::Label return_true; + vixl32::Label return_false; + + // Get offsets of count, value, and class fields within a string object. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + StringEqualsOptimizations optimizations(invoke); + if (!optimizations.GetArgumentNotNull()) { + // Check if input is null, return false if it is. + __ Cbz(arg, &return_false); + } + + if (!optimizations.GetArgumentIsString()) { + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ Ldr(temp, MemOperand(str, class_offset)); + __ Ldr(temp1, MemOperand(arg, class_offset)); + __ Cmp(temp, temp1); + __ B(ne, &return_false); + } + + // Load lengths of this and argument strings. + __ Ldr(temp, MemOperand(str, count_offset)); + __ Ldr(temp1, MemOperand(arg, count_offset)); + // Check if lengths are equal, return false if they're not. + // Also compares the compression style, if differs return false. + __ Cmp(temp, temp1); + __ B(ne, &return_false); + // Return true if both strings are empty. + if (mirror::kUseStringCompression) { + // Length needs to be masked out first because 0 is treated as compressed. + __ Bic(temp, temp, 0x80000000); + } + __ Cbz(temp, &return_true); + // Reference equality check, return true if same reference. + __ Cmp(str, arg); + __ B(eq, &return_true); + + // Assertions that must hold in order to compare strings 2 characters at a time. + DCHECK_ALIGNED(value_offset, 4); + static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare."); + + if (mirror::kUseStringCompression) { + // If not compressed, directly to fast compare. Else do preprocess on length. + __ Cmp(temp1, 0); + __ B(gt, &preloop); + // Mask out compression flag and adjust length for compressed string (8-bit) + // as if it is a 16-bit data, new_length = (length + 1) / 2. + __ Add(temp, temp, 1); + __ Lsr(temp, temp, 1); + __ Bind(&preloop); + } + // Loop to compare strings 2 characters at a time starting at the front of the string. + // Ok to do this because strings with an odd length are zero-padded. + __ Mov(temp1, value_offset); + __ Bind(&loop); + __ Ldr(out, MemOperand(str, temp1)); + __ Ldr(temp2, MemOperand(arg, temp1)); + __ Cmp(out, temp2); + __ B(ne, &return_false); + __ Add(temp1, temp1, sizeof(uint32_t)); + __ Subs(temp, temp, sizeof(uint32_t) / sizeof(uint16_t)); + __ B(gt, &loop); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ Mov(out, 1); + __ B(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ Mov(out, 0); + __ Bind(&end); +} + +static void GenerateVisitStringIndexOf(HInvoke* invoke, + ArmVIXLAssembler* assembler, + CodeGeneratorARMVIXL* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. + SlowPathCodeARMVIXL* slow_path = nullptr; + HInstruction* code_point = invoke->InputAt(1); + if (code_point->IsIntConstant()) { + if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > + std::numeric_limits<uint16_t>::max()) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke); + codegen->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else if (code_point->GetType() != Primitive::kPrimChar) { + vixl32::Register char_reg = InputRegisterAt(invoke, 1); + // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`. + __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1); + slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke); + codegen->AddSlowPath(slow_path); + __ B(hs, slow_path->GetEntryLabel()); + } + + if (start_at_zero) { + vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0)); + DCHECK(tmp_reg.Is(r2)); + // Start-index = 0. + __ Mov(tmp_reg, 0); + } + + codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); + CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>(); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnMainAndSlowPath, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetOut(LocationFrom(r0)); + + // Need to send start-index=0. + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) { + GenerateVisitStringIndexOf( + invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnMainAndSlowPath, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetOut(LocationFrom(r0)); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateVisitStringIndexOf( + invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnMainAndSlowPath, + kIntrinsified); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3))); + locations->SetOut(LocationFrom(r0)); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + vixl32::Register byte_array = InputRegisterAt(invoke, 0); + __ Cmp(byte_array, 0); + SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); + codegen_->AddSlowPath(slow_path); + __ B(eq, slow_path->GetEntryLabel()); + + codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); + CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnMainOnly, + kIntrinsified); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetOut(LocationFrom(r0)); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) { + // No need to emit code checking whether `locations->InAt(2)` is a null + // pointer, as callers of the native method + // + // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) + // + // all include a null check on `data` before calling that method. + codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnMainAndSlowPath, + kIntrinsified); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetOut(LocationFrom(r0)); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + vixl32::Register string_to_copy = InputRegisterAt(invoke, 0); + __ Cmp(string_to_copy, 0); + SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); + codegen_->AddSlowPath(slow_path); + __ B(eq, slow_path->GetEntryLabel()); + + codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path); + CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); + + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + return; + } + + CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); + LocationSummary* locations = invoke->GetLocations(); + if (locations == nullptr) { + return; + } + + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + + if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) { + locations->SetInAt(1, Location::RequiresRegister()); + } + if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) { + locations->SetInAt(3, Location::RequiresRegister()); + } + if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) { + locations->SetInAt(4, Location::RequiresRegister()); + } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register IP cannot be used in + // ReadBarrierSystemArrayCopySlowPathARM (because that register + // is clobbered by ReadBarrierMarkRegX entry points). Get an extra + // temporary register from the register allocator. + locations->AddTemp(Location::RequiresRegister()); + } +} + +static void CheckPosition(ArmVIXLAssembler* assembler, + Location pos, + vixl32::Register input, + Location length, + SlowPathCodeARMVIXL* slow_path, + vixl32::Register temp, + bool length_is_input_length = false) { + // Where is the length in the Array? + const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); + + if (pos.IsConstant()) { + int32_t pos_const = Int32ConstantFrom(pos); + if (pos_const == 0) { + if (!length_is_input_length) { + // Check that length(input) >= length. + __ Ldr(temp, MemOperand(input, length_offset)); + if (length.IsConstant()) { + __ Cmp(temp, Int32ConstantFrom(length)); + } else { + __ Cmp(temp, RegisterFrom(length)); + } + __ B(lt, slow_path->GetEntryLabel()); + } + } else { + // Check that length(input) >= pos. + __ Ldr(temp, MemOperand(input, length_offset)); + __ Subs(temp, temp, pos_const); + __ B(lt, slow_path->GetEntryLabel()); + + // Check that (length(input) - pos) >= length. + if (length.IsConstant()) { + __ Cmp(temp, Int32ConstantFrom(length)); + } else { + __ Cmp(temp, RegisterFrom(length)); + } + __ B(lt, slow_path->GetEntryLabel()); + } + } else if (length_is_input_length) { + // The only way the copy can succeed is if pos is zero. + vixl32::Register pos_reg = RegisterFrom(pos); + __ Cbnz(pos_reg, slow_path->GetEntryLabel()); + } else { + // Check that pos >= 0. + vixl32::Register pos_reg = RegisterFrom(pos); + __ Cmp(pos_reg, 0); + __ B(lt, slow_path->GetEntryLabel()); + + // Check that pos <= length(input). + __ Ldr(temp, MemOperand(input, length_offset)); + __ Subs(temp, temp, pos_reg); + __ B(lt, slow_path->GetEntryLabel()); + + // Check that (length(input) - pos) >= length. + if (length.IsConstant()) { + __ Cmp(temp, Int32ConstantFrom(length)); + } else { + __ Cmp(temp, RegisterFrom(length)); + } + __ B(lt, slow_path->GetEntryLabel()); + } +} + +void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + ArmVIXLAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + vixl32::Register src = InputRegisterAt(invoke, 0); + Location src_pos = locations->InAt(1); + vixl32::Register dest = InputRegisterAt(invoke, 2); + Location dest_pos = locations->InAt(3); + Location length = locations->InAt(4); + Location temp1_loc = locations->GetTemp(0); + vixl32::Register temp1 = RegisterFrom(temp1_loc); + Location temp2_loc = locations->GetTemp(1); + vixl32::Register temp2 = RegisterFrom(temp2_loc); + Location temp3_loc = locations->GetTemp(2); + vixl32::Register temp3 = RegisterFrom(temp3_loc); + + SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); + + vixl32::Label conditions_on_positions_validated; + SystemArrayCopyOptimizations optimizations(invoke); + + // If source and destination are the same, we go to slow path if we need to do + // forward copying. + if (src_pos.IsConstant()) { + int32_t src_pos_constant = Int32ConstantFrom(src_pos); + if (dest_pos.IsConstant()) { + int32_t dest_pos_constant = Int32ConstantFrom(dest_pos); + if (optimizations.GetDestinationIsSource()) { + // Checked when building locations. + DCHECK_GE(src_pos_constant, dest_pos_constant); + } else if (src_pos_constant < dest_pos_constant) { + __ Cmp(src, dest); + __ B(eq, intrinsic_slow_path->GetEntryLabel()); + } + + // Checked when building locations. + DCHECK(!optimizations.GetDestinationIsSource() + || (src_pos_constant >= Int32ConstantFrom(dest_pos))); + } else { + if (!optimizations.GetDestinationIsSource()) { + __ Cmp(src, dest); + __ B(ne, &conditions_on_positions_validated); + } + __ Cmp(RegisterFrom(dest_pos), src_pos_constant); + __ B(gt, intrinsic_slow_path->GetEntryLabel()); + } + } else { + if (!optimizations.GetDestinationIsSource()) { + __ Cmp(src, dest); + __ B(ne, &conditions_on_positions_validated); + } + if (dest_pos.IsConstant()) { + int32_t dest_pos_constant = Int32ConstantFrom(dest_pos); + __ Cmp(RegisterFrom(src_pos), dest_pos_constant); + } else { + __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos)); + } + __ B(lt, intrinsic_slow_path->GetEntryLabel()); + } + + __ Bind(&conditions_on_positions_validated); + + if (!optimizations.GetSourceIsNotNull()) { + // Bail out if the source is null. + __ Cbz(src, intrinsic_slow_path->GetEntryLabel()); + } + + if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { + // Bail out if the destination is null. + __ Cbz(dest, intrinsic_slow_path->GetEntryLabel()); + } + + // If the length is negative, bail out. + // We have already checked in the LocationsBuilder for the constant case. + if (!length.IsConstant() && + !optimizations.GetCountIsSourceLength() && + !optimizations.GetCountIsDestinationLength()) { + __ Cmp(RegisterFrom(length), 0); + __ B(lt, intrinsic_slow_path->GetEntryLabel()); + } + + // Validity checks: source. + CheckPosition(assembler, + src_pos, + src, + length, + intrinsic_slow_path, + temp1, + optimizations.GetCountIsSourceLength()); + + // Validity checks: dest. + CheckPosition(assembler, + dest_pos, + dest, + length, + intrinsic_slow_path, + temp1, + optimizations.GetCountIsDestinationLength()); + + if (!optimizations.GetDoesNotNeedTypeCheck()) { + // Check whether all elements of the source array are assignable to the component + // type of the destination array. We do two checks: the classes are the same, + // or the destination is Object[]. If none of these checks succeed, we go to the + // slow path. + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); + __ Ldrh(temp1, MemOperand(temp1, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + } + + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false); + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); + __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); + __ Ldrh(temp2, MemOperand(temp2, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); + } + + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); + // Note: if heap poisoning is on, we are comparing two unpoisoned references here. + __ Cmp(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + vixl32::Label do_copy; + __ B(eq, &do_copy); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ Ldr(temp1, MemOperand(temp1, super_offset)); + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ B(ne, intrinsic_slow_path->GetEntryLabel()); + } + } else { + // Non read barrier code. + + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ Ldr(temp1, MemOperand(dest, class_offset)); + // /* HeapReference<Class> */ temp2 = src->klass_ + __ Ldr(temp2, MemOperand(src, class_offset)); + bool did_unpoison = false; + if (!optimizations.GetDestinationIsNonPrimitiveArray() || + !optimizations.GetSourceIsNonPrimitiveArray()) { + // One or two of the references need to be unpoisoned. Unpoison them + // both to make the identity check valid. + assembler->MaybeUnpoisonHeapReference(temp1); + assembler->MaybeUnpoisonHeapReference(temp2); + did_unpoison = true; + } + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + __ Ldr(temp3, MemOperand(temp1, component_offset)); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + assembler->MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ Ldrh(temp3, MemOperand(temp3, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); + } + + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp2->component_type_ + __ Ldr(temp3, MemOperand(temp2, component_offset)); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + assembler->MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ Ldrh(temp3, MemOperand(temp3, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); + } + + __ Cmp(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + vixl32::Label do_copy; + __ B(eq, &do_copy); + if (!did_unpoison) { + assembler->MaybeUnpoisonHeapReference(temp1); + } + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ Ldr(temp1, MemOperand(temp1, component_offset)); + assembler->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ Ldr(temp1, MemOperand(temp1, super_offset)); + // No need to unpoison the result, we're comparing against null. + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ B(ne, intrinsic_slow_path->GetEntryLabel()); + } + } + } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { + DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); + // Bail out if the source is not a non primitive array. + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp3` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ Ldr(temp1, MemOperand(src, class_offset)); + assembler->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + __ Ldr(temp3, MemOperand(temp1, component_offset)); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + assembler->MaybeUnpoisonHeapReference(temp3); + } + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ Ldrh(temp3, MemOperand(temp3, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); + } + + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); + uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + // Compute the base source address in `temp1`. + if (src_pos.IsConstant()) { + int32_t constant = Int32ConstantFrom(src_pos); + __ Add(temp1, src, element_size * constant + offset); + } else { + __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift)); + __ Add(temp1, temp1, offset); + } + + // Compute the end source address in `temp3`. + if (length.IsConstant()) { + int32_t constant = Int32ConstantFrom(length); + __ Add(temp3, temp1, element_size * constant); + } else { + __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift)); + } + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // The base destination address is computed later, as `temp2` is + // used for intermediate computations. + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + vixl32::Label loop, done; + + // Don't enter copy loop if `length == 0`. + __ Cmp(temp1, temp3); + __ B(eq, &done); + + // /* int32_t */ monitor = src->monitor_ + __ Ldr(temp2, MemOperand(src, monitor_offset)); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `temp2`. + __ Add(src, src, Operand(temp2, vixl32::LSR, 32)); + + // Slow path used to copy array when `src` is gray. + SlowPathCodeARMVIXL* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); + // Carry flag is the last bit shifted out by LSRS. + __ B(cs, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + + // Compute the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = Int32ConstantFrom(dest_pos); + __ Add(temp2, dest, element_size * constant + offset); + } else { + __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift)); + __ Add(temp2, temp2, offset); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); + + { + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp_reg = temps.Acquire(); + + __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); + __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); + } + + __ Cmp(temp1, temp3); + __ B(ne, &loop); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); + } else { + // Non read barrier code. + + // Compute the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = Int32ConstantFrom(dest_pos); + __ Add(temp2, dest, element_size * constant + offset); + } else { + __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift)); + __ Add(temp2, temp2, offset); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl32::Label loop, done; + __ Cmp(temp1, temp3); + __ B(eq, &done); + __ Bind(&loop); + + { + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp_reg = temps.Acquire(); + + __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); + __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); + } + + __ Cmp(temp1, temp3); + __ B(ne, &loop); + __ Bind(&done); + } + + // We only need one card marking on the destination array. + codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false); + + __ Bind(intrinsic_slow_path->GetExitLabel()); +} + +static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { + // If the graph is debuggable, all callee-saved floating-point registers are blocked by + // the code generator. Furthermore, the register allocator creates fixed live intervals + // for all caller-saved registers because we are doing a function call. As a result, if + // the input and output locations are unallocated, the register allocator runs out of + // registers and fails; however, a debuggable graph is not the common case. + if (invoke->GetBlock()->GetGraph()->IsDebuggable()) { + return; + } + + DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); + DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble); + DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble); + + LocationSummary* const locations = new (arena) LocationSummary(invoke, + LocationSummary::kCallOnMainOnly, + kIntrinsified); + const InvokeRuntimeCallingConventionARMVIXL calling_convention; + + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + // Native code uses the soft float ABI. + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); +} + +static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { + // If the graph is debuggable, all callee-saved floating-point registers are blocked by + // the code generator. Furthermore, the register allocator creates fixed live intervals + // for all caller-saved registers because we are doing a function call. As a result, if + // the input and output locations are unallocated, the register allocator runs out of + // registers and fails; however, a debuggable graph is not the common case. + if (invoke->GetBlock()->GetGraph()->IsDebuggable()) { + return; + } + + DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); + DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble); + DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble); + DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble); + + LocationSummary* const locations = new (arena) LocationSummary(invoke, + LocationSummary::kCallOnMainOnly, + kIntrinsified); + const InvokeRuntimeCallingConventionARMVIXL calling_convention; + + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + // Native code uses the soft float ABI. + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3))); +} + +static void GenFPToFPCall(HInvoke* invoke, + ArmVIXLAssembler* assembler, + CodeGeneratorARMVIXL* codegen, + QuickEntrypointEnum entry) { + LocationSummary* const locations = invoke->GetLocations(); + + DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); + DCHECK(locations->WillCall() && locations->Intrinsified()); + + // Native code uses the soft float ABI. + __ Vmov(RegisterFrom(locations->GetTemp(0)), + RegisterFrom(locations->GetTemp(1)), + InputDRegisterAt(invoke, 0)); + codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); + __ Vmov(OutputDRegister(invoke), + RegisterFrom(locations->GetTemp(0)), + RegisterFrom(locations->GetTemp(1))); +} + +static void GenFPFPToFPCall(HInvoke* invoke, + ArmVIXLAssembler* assembler, + CodeGeneratorARMVIXL* codegen, + QuickEntrypointEnum entry) { + LocationSummary* const locations = invoke->GetLocations(); + + DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); + DCHECK(locations->WillCall() && locations->Intrinsified()); + + // Native code uses the soft float ABI. + __ Vmov(RegisterFrom(locations->GetTemp(0)), + RegisterFrom(locations->GetTemp(1)), + InputDRegisterAt(invoke, 0)); + __ Vmov(RegisterFrom(locations->GetTemp(2)), + RegisterFrom(locations->GetTemp(3)), + InputDRegisterAt(invoke, 1)); + codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); + __ Vmov(OutputDRegister(invoke), + RegisterFrom(locations->GetTemp(0)), + RegisterFrom(locations->GetTemp(1))); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) { + GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) { + GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) { + GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0)); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); + vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); + vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out()); + vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out()); + + __ Rbit(out_reg_lo, in_reg_hi); + __ Rbit(out_reg_hi, in_reg_lo); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0)); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); + vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); + vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out()); + vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out()); + + __ Rev(out_reg_lo, in_reg_hi); + __ Rev(out_reg_hi, in_reg_lo); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0)); +} + +static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) { + DCHECK(Primitive::IsIntOrLongType(type)) << type; + DCHECK_EQ(instr->GetType(), Primitive::kPrimInt); + DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type); + + bool is_long = type == Primitive::kPrimLong; + LocationSummary* locations = instr->GetLocations(); + Location in = locations->InAt(0); + vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in); + vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0; + vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0)); + vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0)); + vixl32::Register out_r = OutputRegister(instr); + + // Move data from core register(s) to temp D-reg for bit count calculation, then move back. + // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg, + // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency, + // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'. + __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0| + __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c| + __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c| + __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c| + if (is_long) { + __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c| + } + __ Vmov(out_r, tmp_s); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) { + GenBitCount(invoke, Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) { + VisitIntegerBitCount(invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) { + GenBitCount(invoke, Primitive::kPrimLong, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->SetInAt(4, Location::RequiresRegister()); + + // Temporary registers to store lengths of strings and for calculations. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + // Check assumption that sizeof(Char) is 2 (used in scaling below). + const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + DCHECK_EQ(char_size, 2u); + + // Location of data in char array buffer. + const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); + + // Location of char array data in string. + const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); + + // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin); + // Since getChars() calls getCharsNoCheck() - we use registers rather than constants. + vixl32::Register srcObj = InputRegisterAt(invoke, 0); + vixl32::Register srcBegin = InputRegisterAt(invoke, 1); + vixl32::Register srcEnd = InputRegisterAt(invoke, 2); + vixl32::Register dstObj = InputRegisterAt(invoke, 3); + vixl32::Register dstBegin = InputRegisterAt(invoke, 4); + + vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0)); + vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1)); + vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2)); + + vixl32::Label done, compressed_string_loop; + // dst to be copied. + __ Add(dst_ptr, dstObj, data_offset); + __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1)); + + __ Subs(num_chr, srcEnd, srcBegin); + // Early out for valid zero-length retrievals. + __ B(eq, &done); + + // src range to copy. + __ Add(src_ptr, srcObj, value_offset); + + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + vixl32::Register temp; + vixl32::Label compressed_string_preloop; + if (mirror::kUseStringCompression) { + // Location of count in string. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + temp = temps.Acquire(); + // String's length. + __ Ldr(temp, MemOperand(srcObj, count_offset)); + __ Cmp(temp, 0); + temps.Release(temp); + __ B(lt, &compressed_string_preloop); + } + __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1)); + + // Do the copy. + vixl32::Label loop, remainder; + + temp = temps.Acquire(); + // Save repairing the value of num_chr on the < 4 character path. + __ Subs(temp, num_chr, 4); + __ B(lt, &remainder); + + // Keep the result of the earlier subs, we are going to fetch at least 4 characters. + __ Mov(num_chr, temp); + + // Main loop used for longer fetches loads and stores 4x16-bit characters at a time. + // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code + // to rectify these everywhere this intrinsic applies.) + __ Bind(&loop); + __ Ldr(temp, MemOperand(src_ptr, char_size * 2)); + __ Subs(num_chr, num_chr, 4); + __ Str(temp, MemOperand(dst_ptr, char_size * 2)); + __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex)); + __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex)); + temps.Release(temp); + __ B(ge, &loop); + + __ Adds(num_chr, num_chr, 4); + __ B(eq, &done); + + // Main loop for < 4 character case and remainder handling. Loads and stores one + // 16-bit Java character at a time. + __ Bind(&remainder); + temp = temps.Acquire(); + __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex)); + __ Subs(num_chr, num_chr, 1); + __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); + temps.Release(temp); + __ B(gt, &remainder); + __ B(&done); + + if (mirror::kUseStringCompression) { + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. + __ Bind(&compressed_string_preloop); + __ Add(src_ptr, src_ptr, srcBegin); + __ Bind(&compressed_string_loop); + temp = temps.Acquire(); + __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex)); + __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); + temps.Release(temp); + __ Subs(num_chr, num_chr, 1); + __ B(gt, &compressed_string_loop); + } + + __ Bind(&done); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { + ArmVIXLAssembler* const assembler = GetAssembler(); + const vixl32::Register out = OutputRegister(invoke); + // Shifting left by 1 bit makes the value encodable as an immediate operand; + // we don't care about the sign bit anyway. + constexpr uint32_t infinity = kPositiveInfinityFloat << 1U; + + __ Vmov(out, InputSRegisterAt(invoke, 0)); + // We don't care about the sign bit, so shift left. + __ Lsl(out, out, 1); + __ Eor(out, out, infinity); + // If the result is 0, then it has 32 leading zeros, and less than that otherwise. + __ Clz(out, out); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { + ArmVIXLAssembler* const assembler = GetAssembler(); + const vixl32::Register out = OutputRegister(invoke); + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + // The highest 32 bits of double precision positive infinity separated into + // two constants encodable as immediate operands. + constexpr uint32_t infinity_high = 0x7f000000U; + constexpr uint32_t infinity_high2 = 0x00f00000U; + + static_assert((infinity_high | infinity_high2) == + static_cast<uint32_t>(kPositiveInfinityDouble >> 32U), + "The constants do not add up to the high 32 bits of double " + "precision positive infinity."); + __ Vmov(temp, out, InputDRegisterAt(invoke, 0)); + __ Eor(out, out, infinity_high); + __ Eor(out, out, infinity_high2); + // We don't care about the sign bit, so shift left. + __ Orr(out, temp, Operand(out, vixl32::LSL, 1)); + // If the result is 0, then it has 32 leading zeros, and less than that otherwise. + __ Clz(out, out); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); +} + +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinDoubleDouble) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinFloatFloat) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxFloatFloat) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinLongLong) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxLongLong) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathCeil) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFloor) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRint) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. +UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit) + +// 1.8. +UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject) + +UNREACHABLE_INTRINSICS(ARMVIXL) + +#undef __ + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h new file mode 100644 index 0000000000..6e79cb76a1 --- /dev/null +++ b/compiler/optimizing/intrinsics_arm_vixl.h @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_ + +#include "intrinsics.h" +#include "utils/arm/assembler_arm_vixl.h" + +namespace art { + +namespace arm { + +class ArmVIXLAssembler; +class CodeGeneratorARMVIXL; + +class IntrinsicLocationsBuilderARMVIXL FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen); + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* arena_; + ArmVIXLAssembler* assembler_; + const ArmInstructionSetFeatures& features_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARMVIXL); +}; + +class IntrinsicCodeGeneratorARMVIXL FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorARMVIXL(CodeGeneratorARMVIXL* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + private: + ArenaAllocator* GetAllocator(); + ArmVIXLAssembler* GetAssembler(); + + CodeGeneratorARMVIXL* codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARMVIXL); +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_ diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index aae3899847..43682c5633 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -3200,7 +3200,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // if (src_ptr != end_ptr) { // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // // Slow-path copy. // for (size_t i = 0; i != length; ++i) { @@ -3222,14 +3222,13 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kEqual, &done); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); - // if (rb_state == ReadBarrier::gray_ptr_) + // if (rb_state == ReadBarrier::GrayState()) // goto slow_path; // At this point, just do the "if" and make sure that flags are preserved until the branch. __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index cdef22f6de..de2606c327 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -1399,7 +1399,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // if (src_ptr != end_ptr) { // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // // Slow-path copy. // do { @@ -1420,14 +1420,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kEqual, &done); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); - // if (rb_state == ReadBarrier::gray_ptr_) + // if (rb_state == ReadBarrier::GrayState()) // goto slow_path; // At this point, just do the "if" and make sure that flags are preserved until the branch. __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index b91e9e6868..15e605971e 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -33,11 +33,11 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { public: ReferenceInfo(HInstruction* reference, size_t pos) : reference_(reference), position_(pos) { is_singleton_ = true; - is_singleton_and_not_returned_ = true; + is_singleton_and_non_escaping_ = true; if (!reference_->IsNewInstance() && !reference_->IsNewArray()) { // For references not allocated in the method, don't assume anything. is_singleton_ = false; - is_singleton_and_not_returned_ = false; + is_singleton_and_non_escaping_ = false; return; } @@ -50,7 +50,7 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { // BoundType shouldn't normally be necessary for a NewInstance. // Just be conservative for the uncommon cases. is_singleton_ = false; - is_singleton_and_not_returned_ = false; + is_singleton_and_non_escaping_ = false; return; } if (user->IsPhi() || user->IsSelect() || user->IsInvoke() || @@ -62,21 +62,37 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { // reference_ is merged to HPhi/HSelect, passed to a callee, or stored to heap. // reference_ isn't the only name that can refer to its value anymore. is_singleton_ = false; - is_singleton_and_not_returned_ = false; + is_singleton_and_non_escaping_ = false; return; } if ((user->IsUnresolvedInstanceFieldGet() && (reference_ == user->InputAt(0))) || (user->IsUnresolvedInstanceFieldSet() && (reference_ == user->InputAt(0)))) { - // The field is accessed in an unresolved way. We mark the object as a singleton to - // disable load/store optimizations on it. + // The field is accessed in an unresolved way. We mark the object as a non-singleton + // to disable load/store optimizations on it. // Note that we could optimize this case and still perform some optimizations until // we hit the unresolved access, but disabling is the simplest. is_singleton_ = false; - is_singleton_and_not_returned_ = false; + is_singleton_and_non_escaping_ = false; return; } if (user->IsReturn()) { - is_singleton_and_not_returned_ = false; + is_singleton_and_non_escaping_ = false; + } + } + + if (!is_singleton_ || !is_singleton_and_non_escaping_) { + return; + } + + // Look at Environment uses and if it's for HDeoptimize, it's treated the same + // as a return which escapes at the end of executing the compiled code. We don't + // do store elimination for singletons that escape through HDeoptimize. + // Other Environment uses are fine since LSE is disabled for debuggable. + for (const HUseListNode<HEnvironment*>& use : reference_->GetEnvUses()) { + HEnvironment* user = use.GetUser(); + if (user->GetHolder()->IsDeoptimize()) { + is_singleton_and_non_escaping_ = false; + break; } } } @@ -96,17 +112,22 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { return is_singleton_; } - // Returns true if reference_ is a singleton and not returned to the caller. + // Returns true if reference_ is a singleton and not returned to the caller or + // used as an environment local of an HDeoptimize instruction. // The allocation and stores into reference_ may be eliminated for such cases. - bool IsSingletonAndNotReturned() const { - return is_singleton_and_not_returned_; + bool IsSingletonAndNonEscaping() const { + return is_singleton_and_non_escaping_; } private: HInstruction* const reference_; const size_t position_; // position in HeapLocationCollector's ref_info_array_. bool is_singleton_; // can only be referred to by a single name in the method. - bool is_singleton_and_not_returned_; // reference_ is singleton and not returned to caller. + + // reference_ is singleton and does not escape in the end either by + // returning to the caller, or being used as an environment local of an + // HDeoptimize instruction. + bool is_singleton_and_non_escaping_; DISALLOW_COPY_AND_ASSIGN(ReferenceInfo); }; @@ -202,8 +223,7 @@ class HeapLocationCollector : public HGraphVisitor { kArenaAllocLSE), has_heap_stores_(false), has_volatile_(false), - has_monitor_operations_(false), - may_deoptimize_(false) {} + has_monitor_operations_(false) {} size_t GetNumberOfHeapLocations() const { return heap_locations_.size(); @@ -236,13 +256,6 @@ class HeapLocationCollector : public HGraphVisitor { return has_monitor_operations_; } - // Returns whether this method may be deoptimized. - // Currently we don't have meta data support for deoptimizing - // a method that eliminates allocations/stores. - bool MayDeoptimize() const { - return may_deoptimize_; - } - // Find and return the heap location index in heap_locations_. size_t FindHeapLocationIndex(ReferenceInfo* ref_info, size_t offset, @@ -493,10 +506,6 @@ class HeapLocationCollector : public HGraphVisitor { CreateReferenceInfoForReferenceType(instruction); } - void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) OVERRIDE { - may_deoptimize_ = true; - } - void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE { has_monitor_operations_ = true; } @@ -508,7 +517,6 @@ class HeapLocationCollector : public HGraphVisitor { // alias analysis and won't be as effective. bool has_volatile_; // If there are volatile field accesses. bool has_monitor_operations_; // If there are monitor operations. - bool may_deoptimize_; // Only true for HDeoptimize with single-frame deoptimization. DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector); }; @@ -663,27 +671,59 @@ class LSEVisitor : public HGraphVisitor { if (predecessors.size() == 0) { return; } + ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0; i < heap_values.size(); i++) { - HInstruction* pred0_value = heap_values_for_[predecessors[0]->GetBlockId()][i]; - heap_values[i] = pred0_value; - if (pred0_value != kUnknownHeapValue) { - for (size_t j = 1; j < predecessors.size(); j++) { - HInstruction* pred_value = heap_values_for_[predecessors[j]->GetBlockId()][i]; - if (pred_value != pred0_value) { - heap_values[i] = kUnknownHeapValue; - break; - } + HInstruction* merged_value = nullptr; + // Whether merged_value is a result that's merged from all predecessors. + bool from_all_predecessors = true; + ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); + HInstruction* singleton_ref = nullptr; + if (ref_info->IsSingletonAndNonEscaping()) { + // We do more analysis of liveness when merging heap values for such + // cases since stores into such references may potentially be eliminated. + singleton_ref = ref_info->GetReference(); + } + + for (HBasicBlock* predecessor : predecessors) { + HInstruction* pred_value = heap_values_for_[predecessor->GetBlockId()][i]; + if ((singleton_ref != nullptr) && + !singleton_ref->GetBlock()->Dominates(predecessor)) { + // singleton_ref is not live in this predecessor. Skip this predecessor since + // it does not really have the location. + DCHECK_EQ(pred_value, kUnknownHeapValue); + from_all_predecessors = false; + continue; + } + if (merged_value == nullptr) { + // First seen heap value. + merged_value = pred_value; + } else if (pred_value != merged_value) { + // There are conflicting values. + merged_value = kUnknownHeapValue; + break; } } - if (heap_values[i] == kUnknownHeapValue) { + if (merged_value == kUnknownHeapValue) { + // There are conflicting heap values from different predecessors. // Keep the last store in each predecessor since future loads cannot be eliminated. - for (size_t j = 0; j < predecessors.size(); j++) { - ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessors[j]->GetBlockId()]; + for (HBasicBlock* predecessor : predecessors) { + ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessor->GetBlockId()]; KeepIfIsStore(pred_values[i]); } } + + if ((merged_value == nullptr) || !from_all_predecessors) { + DCHECK(singleton_ref != nullptr); + DCHECK((singleton_ref->GetBlock() == block) || + !singleton_ref->GetBlock()->Dominates(block)); + // singleton_ref is not defined before block or defined only in some of its + // predecessors, so block doesn't really have the location at its entry. + heap_values[i] = kUnknownHeapValue; + } else { + heap_values[i] = merged_value; + } } } @@ -812,8 +852,7 @@ class LSEVisitor : public HGraphVisitor { } else if (index != nullptr) { // For array element, don't eliminate stores since it can be easily aliased // with non-constant index. - } else if (!heap_location_collector_.MayDeoptimize() && - ref_info->IsSingletonAndNotReturned()) { + } else if (ref_info->IsSingletonAndNonEscaping()) { // Store into a field of a singleton that's not returned. The value cannot be // killed due to aliasing/invocation. It can be redundant since future loads can // directly get the value set by this instruction. The value can still be killed due to @@ -987,8 +1026,7 @@ class LSEVisitor : public HGraphVisitor { // new_instance isn't used for field accesses. No need to process it. return; } - if (!heap_location_collector_.MayDeoptimize() && - ref_info->IsSingletonAndNotReturned() && + if (ref_info->IsSingletonAndNonEscaping() && !new_instance->IsFinalizable() && !new_instance->NeedsAccessCheck()) { singleton_new_instances_.push_back(new_instance); diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index d157509758..a9fe209063 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -16,11 +16,16 @@ #include "locations.h" +#include <type_traits> + #include "nodes.h" #include "code_generator.h" namespace art { +// Verify that Location is trivially copyable. +static_assert(std::is_trivially_copyable<Location>::value, "Location should be trivially copyable"); + LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind, bool intrinsified) diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index da27928ef2..52747c0cc4 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -91,12 +91,9 @@ class Location : public ValueObject { DCHECK(!IsValid()); } - Location(const Location& other) : value_(other.value_) {} + Location(const Location& other) = default; - Location& operator=(const Location& other) { - value_ = other.value_; - return *this; - } + Location& operator=(const Location& other) = default; bool IsConstant() const { return (value_ & kLocationConstantMask) == kConstant; @@ -328,7 +325,6 @@ class Location : public ValueObject { LOG(FATAL) << "Should not use this location kind"; } UNREACHABLE(); - return "?"; } // Unallocated locations. diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 51be1d1e91..f4616e39e6 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -28,6 +28,30 @@ static void RemoveFromCycle(HInstruction* instruction) { instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false); } +// Detect a goto block and sets succ to the single successor. +static bool IsGotoBlock(HBasicBlock* block, /*out*/ HBasicBlock** succ) { + if (block->GetPredecessors().size() == 1 && + block->GetSuccessors().size() == 1 && + block->IsSingleGoto()) { + *succ = block->GetSingleSuccessor(); + return true; + } + return false; +} + +// Detect an early exit loop. +static bool IsEarlyExit(HLoopInformation* loop_info) { + HBlocksInLoopReversePostOrderIterator it_loop(*loop_info); + for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) { + for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) { + if (!loop_info->Contains(*successor)) { + return true; + } + } + } + return false; +} + // // Class methods. // @@ -168,7 +192,9 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) { int32_t use_count = 0; if (IsPhiInduction(phi) && IsOnlyUsedAfterLoop(node->loop_info, phi, &use_count) && - TryReplaceWithLastValue(phi, use_count, preheader)) { + // No uses, or no early-exit with proper replacement. + (use_count == 0 || + (!IsEarlyExit(node->loop_info) && TryReplaceWithLastValue(phi, preheader)))) { for (HInstruction* i : *iset_) { RemoveFromCycle(i); } @@ -178,31 +204,57 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) { } void HLoopOptimization::SimplifyBlocks(LoopNode* node) { - for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - // Remove instructions that are dead. - for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) { - HInstruction* instruction = i.Current(); - if (instruction->IsDeadAndRemovable()) { - block->RemoveInstruction(instruction); + // Repeat the block simplifications until no more changes occur. Note that since + // each simplification consists of eliminating code (without introducing new code), + // this process is always finite. + bool changed; + do { + changed = false; + // Iterate over all basic blocks in the loop-body. + for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + // Remove dead instructions from the loop-body. + for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) { + HInstruction* instruction = i.Current(); + if (instruction->IsDeadAndRemovable()) { + changed = true; + block->RemoveInstruction(instruction); + } } - } - // Remove trivial control flow blocks from the loop-body. - if (block->GetPredecessors().size() == 1 && - block->GetSuccessors().size() == 1 && - block->GetFirstInstruction()->IsGoto()) { - HBasicBlock* pred = block->GetSinglePredecessor(); - HBasicBlock* succ = block->GetSingleSuccessor(); - if (succ->GetPredecessors().size() == 1) { + // Remove trivial control flow blocks from the loop-body. + HBasicBlock* succ = nullptr; + if (IsGotoBlock(block, &succ) && succ->GetPredecessors().size() == 1) { + // Trivial goto block can be removed. + HBasicBlock* pred = block->GetSinglePredecessor(); + changed = true; pred->ReplaceSuccessor(block, succ); - block->ClearDominanceInformation(); - block->SetDominator(pred); // needed by next disconnect. + block->RemoveDominatedBlock(succ); block->DisconnectAndDelete(); pred->AddDominatedBlock(succ); succ->SetDominator(pred); + } else if (block->GetSuccessors().size() == 2) { + // Trivial if block can be bypassed to either branch. + HBasicBlock* succ0 = block->GetSuccessors()[0]; + HBasicBlock* succ1 = block->GetSuccessors()[1]; + HBasicBlock* meet0 = nullptr; + HBasicBlock* meet1 = nullptr; + if (succ0 != succ1 && + IsGotoBlock(succ0, &meet0) && + IsGotoBlock(succ1, &meet1) && + meet0 == meet1 && // meets again + meet0 != block && // no self-loop + meet0->GetPhis().IsEmpty()) { // not used for merging + changed = true; + succ0->DisconnectAndDelete(); + if (block->Dominates(meet0)) { + block->RemoveDominatedBlock(meet0); + succ1->AddDominatedBlock(meet0); + meet0->SetDominator(succ1); + } + } } } - } + } while (changed); } void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) { @@ -240,12 +292,12 @@ void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) { if (IsEmptyHeader(header) && IsEmptyBody(body) && IsOnlyUsedAfterLoop(node->loop_info, header->GetFirstPhi(), &use_count) && - TryReplaceWithLastValue(header->GetFirstPhi(), use_count, preheader)) { + // No uses, or proper replacement. + (use_count == 0 || TryReplaceWithLastValue(header->GetFirstPhi(), preheader))) { body->DisconnectAndDelete(); exit->RemovePredecessor(header); header->RemoveSuccessor(exit); - header->ClearDominanceInformation(); - header->SetDominator(preheader); // needed by next disconnect. + header->RemoveDominatedBlock(exit); header->DisconnectAndDelete(); preheader->AddSuccessor(exit); preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator @@ -259,22 +311,23 @@ void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) { bool HLoopOptimization::IsPhiInduction(HPhi* phi) { ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi); if (set != nullptr) { + DCHECK(iset_->empty()); for (HInstruction* i : *set) { - // Check that, other than phi, instruction are removable with uses contained in the cycle. - // TODO: investigate what cases are no longer in the graph. - if (i != phi) { - if (!i->IsInBlock() || !i->IsRemovable()) { - return false; - } + // Check that, other than instructions that are no longer in the graph (removed earlier) + // each instruction is removable and, other than the phi, uses are contained in the cycle. + if (!i->IsInBlock()) { + continue; + } else if (!i->IsRemovable()) { + return false; + } else if (i != phi) { for (const HUseListNode<HInstruction*>& use : i->GetUses()) { if (set->find(use.GetUser()) == set->end()) { return false; } } } + iset_->insert(i); // copy } - DCHECK(iset_->empty()); - iset_->insert(set->begin(), set->end()); // copy return true; } return false; @@ -358,20 +411,16 @@ void HLoopOptimization::ReplaceAllUses(HInstruction* instruction, HInstruction* } } -bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, - int32_t use_count, - HBasicBlock* block) { - // If true uses appear after the loop, replace these uses with the last value. Environment - // uses can consume this value too, since any first true use is outside the loop (although - // this may imply that de-opting may look "ahead" a bit on the phi value). If there are only - // environment uses, the value is dropped altogether, since the computations have no effect. - if (use_count > 0) { - if (!induction_range_.CanGenerateLastValue(instruction)) { - return false; - } +bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block) { + // Try to replace outside uses with the last value. Environment uses can consume this + // value too, since any first true use is outside the loop (although this may imply + // that de-opting may look "ahead" a bit on the phi value). If there are only environment + // uses, the value is dropped altogether, since the computations have no effect. + if (induction_range_.CanGenerateLastValue(instruction)) { ReplaceAllUses(instruction, induction_range_.GenerateLastValue(instruction, graph_, block)); + return true; } - return true; + return false; } } // namespace art diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index e18d17531e..3391bef4e9 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -72,9 +72,7 @@ class HLoopOptimization : public HOptimization { HInstruction* instruction, /*out*/ int32_t* use_count); void ReplaceAllUses(HInstruction* instruction, HInstruction* replacement); - bool TryReplaceWithLastValue(HInstruction* instruction, - int32_t use_count, - HBasicBlock* block); + bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block); // Range information based on prior induction variable analysis. InductionVarRange induction_range_; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 6a45149509..a946e319c7 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1855,6 +1855,15 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { size_t InputCount() const { return GetInputRecords().size(); } HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); } + bool HasInput(HInstruction* input) const { + for (const HInstruction* i : GetInputs()) { + if (i == input) { + return true; + } + } + return false; + } + void SetRawInputAt(size_t index, HInstruction* input) { SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input)); } @@ -5683,7 +5692,10 @@ class HLoadString FINAL : public HInstruction { // all other types are unavailable. kDexCacheViaMethod, - kLast = kDexCacheViaMethod + // Load from the root table associated with the JIT compiled method. + kJitTableAddress, + + kLast = kJitTableAddress, }; HLoadString(HCurrentMethod* current_method, @@ -5741,7 +5753,8 @@ class HLoadString FINAL : public HInstruction { LoadKind load_kind = GetLoadKind(); if (load_kind == LoadKind::kBootImageLinkTimeAddress || load_kind == LoadKind::kBootImageLinkTimePcRelative || - load_kind == LoadKind::kBootImageAddress) { + load_kind == LoadKind::kBootImageAddress || + load_kind == LoadKind::kJitTableAddress) { return false; } return !IsInDexCache(); @@ -5794,7 +5807,8 @@ class HLoadString FINAL : public HInstruction { return load_kind == LoadKind::kBootImageLinkTimeAddress || load_kind == LoadKind::kBootImageLinkTimePcRelative || load_kind == LoadKind::kBssEntry || - load_kind == LoadKind::kDexCacheViaMethod; + load_kind == LoadKind::kDexCacheViaMethod || + load_kind == LoadKind::kJitTableAddress; } static bool HasAddress(LoadKind load_kind) { diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 19fd6f95c3..1add660f1b 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -117,6 +117,7 @@ class CodeVectorAllocator FINAL : public CodeAllocator { size_t GetSize() const { return size_; } const ArenaVector<uint8_t>& GetMemory() const { return memory_; } + uint8_t* GetData() { return memory_.data(); } private: ArenaVector<uint8_t> memory_; @@ -755,8 +756,10 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( graph, stats, "dead_code_elimination$initial"); HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination( + graph, stats, "dead_code_elimination$after_inlining"); + HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination( graph, stats, "dead_code_elimination$final"); - HConstantFolding* fold1 = new (arena) HConstantFolding(graph); + HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding"); InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats); HConstantFolding* fold2 = new (arena) HConstantFolding( @@ -795,6 +798,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, select_generator, fold2, // TODO: if we don't inline we can also skip fold2. simplify2, + dce2, side_effects, gvn, licm, @@ -804,7 +808,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, fold3, // evaluates code generated by dynamic bce simplify3, lse, - dce2, + dce3, // The codegen has a few assumptions that only the instruction simplifier // can satisfy. For example, the code generator does not expect to see a // HTypeConversion from a type to the same type. @@ -1123,7 +1127,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, bool osr) { - StackHandleScope<2> hs(self); + StackHandleScope<3> hs(self); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( method->GetDeclaringClass()->GetClassLoader())); Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache())); @@ -1169,22 +1173,43 @@ bool OptimizingCompiler::JitCompile(Thread* self, } size_t stack_map_size = codegen->ComputeStackMapsSize(); - uint8_t* stack_map_data = code_cache->ReserveData(self, stack_map_size, method); - if (stack_map_data == nullptr) { + size_t number_of_roots = codegen->GetNumberOfJitRoots(); + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + // We allocate an object array to ensure the JIT roots that we will collect in EmitJitRoots + // will be visible by the GC between EmitLiterals and CommitCode. Once CommitCode is + // executed, this array is not needed. + Handle<mirror::ObjectArray<mirror::Object>> roots( + hs.NewHandle(mirror::ObjectArray<mirror::Object>::Alloc( + self, class_linker->GetClassRoot(ClassLinker::kObjectArrayClass), number_of_roots))); + if (roots.Get() == nullptr) { + // Out of memory, just clear the exception to avoid any Java exception uncaught problems. + DCHECK(self->IsExceptionPending()); + self->ClearException(); + return false; + } + uint8_t* stack_map_data = nullptr; + uint8_t* roots_data = nullptr; + code_cache->ReserveData( + self, stack_map_size, number_of_roots, method, &stack_map_data, &roots_data); + if (stack_map_data == nullptr || roots_data == nullptr) { return false; } MaybeRecordStat(MethodCompilationStat::kCompiled); codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item); + codegen->EmitJitRoots(code_allocator.GetData(), roots, roots_data, dex_cache); + const void* code = code_cache->CommitCode( self, method, stack_map_data, + roots_data, codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), code_allocator.GetMemory().data(), code_allocator.GetSize(), - osr); + osr, + roots); if (code == nullptr) { code_cache->ClearData(self, stack_map_data); diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index caf66474eb..5991791a15 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -374,7 +374,9 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { if (current->GetType() == Primitive::kPrimNot) { DCHECK(interval->GetDefinedBy()->IsActualObject()) << interval->GetDefinedBy()->DebugName() - << "@" << safepoint_position->GetInstruction()->DebugName(); + << '(' << interval->GetDefinedBy()->GetId() << ')' + << "@" << safepoint_position->GetInstruction()->DebugName() + << '(' << safepoint_position->GetInstruction()->GetId() << ')'; LocationSummary* locations = safepoint_position->GetLocations(); if (current->GetParent()->HasSpillSlot()) { locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index fd1db592bb..13e449431c 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -281,7 +281,8 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); if (codegen_->GetCompilerOptions().IsBootImage()) { - // Compiling boot image. Resolve the string and allocate it if needed. + // Compiling boot image. Resolve the string and allocate it if needed, to ensure + // the string will be added to the boot image. DCHECK(!runtime->UseJitCompilation()); mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache); CHECK(string != nullptr); @@ -297,10 +298,14 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { } else if (runtime->UseJitCompilation()) { // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); - mirror::String* string = dex_cache->GetResolvedString(string_index); - if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { - desired_load_kind = HLoadString::LoadKind::kBootImageAddress; - address = reinterpret_cast64<uint64_t>(string); + mirror::String* string = class_linker->LookupString(dex_file, string_index, dex_cache); + if (string != nullptr) { + if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { + desired_load_kind = HLoadString::LoadKind::kBootImageAddress; + address = reinterpret_cast64<uint64_t>(string); + } else { + desired_load_kind = HLoadString::LoadKind::kJitTableAddress; + } } } else { // AOT app compilation. Try to lookup the string without allocating if not found. @@ -322,6 +327,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: case HLoadString::LoadKind::kBssEntry: case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kJitTableAddress: load_string->SetLoadKindWithStringReference(load_kind, dex_file, string_index); break; case HLoadString::LoadKind::kBootImageAddress: |