diff options
Diffstat (limited to 'compiler/optimizing')
22 files changed, 3226 insertions, 193 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index d2357a5d05..7dc094b25f 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -548,7 +548,21 @@ class BCEVisitor : public HGraphVisitor { void VisitBasicBlock(HBasicBlock* block) OVERRIDE { DCHECK(!IsAddedBlock(block)); first_index_bounds_check_map_.clear(); - HGraphVisitor::VisitBasicBlock(block); + // Visit phis and instructions using a safe iterator. The iteration protects + // against deleting the current instruction during iteration. However, it + // must advance next_ if that instruction is deleted during iteration. + for (HInstruction* instruction = block->GetFirstPhi(); instruction != nullptr;) { + DCHECK(instruction->IsInBlock()); + next_ = instruction->GetNext(); + instruction->Accept(this); + instruction = next_; + } + for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) { + DCHECK(instruction->IsInBlock()); + next_ = instruction->GetNext(); + instruction->Accept(this); + instruction = next_; + } // We should never deoptimize from an osr method, otherwise we might wrongly optimize // code dominated by the deoptimization. if (!GetGraph()->IsCompilingOsr()) { @@ -1798,7 +1812,12 @@ class BCEVisitor : public HGraphVisitor { } /** Helper method to replace an instruction with another instruction. */ - static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) { + void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) { + // Safe iteration. + if (instruction == next_) { + next_ = next_->GetNext(); + } + // Replace and remove. instruction->ReplaceWith(replacement); instruction->GetBlock()->RemoveInstruction(instruction); } @@ -1831,6 +1850,9 @@ class BCEVisitor : public HGraphVisitor { // Range analysis based on induction variables. InductionVarRange induction_range_; + // Safe iteration. + HInstruction* next_; + DISALLOW_COPY_AND_ASSIGN(BCEVisitor); }; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index be65f89ef1..7cab97d2e5 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -638,6 +638,11 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); + // The read barrier instrumentation of object ArrayGet + // instructions does not support the HIntermediateAddress + // instruction. + DCHECK(!(instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); // No need to save live registers; it's taken care of by the @@ -894,6 +899,11 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM { (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); + // The read barrier instrumentation of object ArrayGet + // instructions does not support the HIntermediateAddress + // instruction. + DCHECK(!(instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -4841,8 +4851,6 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { instruction->IsStringCharAt(); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); switch (type) { case Primitive::kPrimBoolean: @@ -4915,6 +4923,11 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimNot: { + // The read barrier instrumentation of object ArrayGet + // instructions does not support the HIntermediateAddress + // instruction. + DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); + static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); @@ -5055,8 +5068,6 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { Location value_loc = locations->InAt(2); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); switch (value_type) { case Primitive::kPrimBoolean: @@ -5306,8 +5317,6 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { } void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) { - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -5322,9 +5331,6 @@ void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* Location first = locations->InAt(0); Location second = locations->InAt(1); - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); - if (second.IsRegister()) { __ add(out.AsRegister<Register>(), first.AsRegister<Register>(), @@ -6843,7 +6849,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering // HeapReference<Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. // } @@ -6919,14 +6925,13 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i } AddSlowPath(slow_path); - // if (rb_state == ReadBarrier::gray_ptr_) + // if (rb_state == ReadBarrier::GrayState()) // ref = ReadBarrier::Mark(ref); // Given the numeric representation, it's enough to check the low bit of the // rb_state. We do that by shifting the bit out of the lock word with LSRS // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1); __ b(slow_path->GetEntryLabel(), CS); // Carry flag is the last bit shifted out by LSRS. __ Bind(slow_path->GetExitLabel()); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index b53750966d..d868984387 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -626,6 +626,11 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); + // The read barrier instrumentation of object ArrayGet + // instructions does not support the HIntermediateAddress + // instruction. + DCHECK(!(instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); // No need to save live registers; it's taken care of by the @@ -876,7 +881,9 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + // The read barrier instrumentation of object ArrayGet + // instructions does not support the HIntermediateAddress + // instruction. DCHECK(!(instruction_->IsArrayGet() && instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); @@ -2192,8 +2199,6 @@ void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( } void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); @@ -2201,10 +2206,7 @@ void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instr locations->SetOut(Location::RequiresRegister()); } -void InstructionCodeGeneratorARM64::VisitIntermediateAddress( - HIntermediateAddress* instruction) { - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); +void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { __ Add(OutputRegister(instruction), InputRegisterAt(instruction, 0), Operand(InputOperandAt(instruction, 1))); @@ -2304,11 +2306,15 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // Block pools between `Load` and `MaybeRecordImplicitNullCheck`. BlockPoolsScope block_pools(masm); + // The read barrier instrumentation of object ArrayGet instructions + // does not support the HIntermediateAddress instruction. + DCHECK(!((type == Primitive::kPrimNot) && + instruction->GetArray()->IsIntermediateAddress() && + kEmitCompilerReadBarrier)); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Object ArrayGet with Baker's read barrier case. Register temp = temps.AcquireW(); - // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. - DCHECK(!instruction->GetArray()->IsIntermediateAddress()); // Note that a potential implicit null check is handled in the // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( @@ -2341,9 +2347,6 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } else { Register temp = temps.AcquireSameSizeAs(obj); if (instruction->GetArray()->IsIntermediateAddress()) { - // The read barrier instrumentation does not support the - // HIntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); // We do not need to compute the intermediate address from the array: the // input instruction has done it already. See the comment in // `TryExtractArrayAccessAddress()`. @@ -2451,9 +2454,6 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireSameSizeAs(array); if (instruction->GetArray()->IsIntermediateAddress()) { - // The read barrier instrumentation does not support the - // HIntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); // We do not need to compute the intermediate address from the array: the // input instruction has done it already. See the comment in // `TryExtractArrayAccessAddress()`. @@ -5426,7 +5426,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering // HeapReference<Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. // } @@ -5517,12 +5517,11 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* } AddSlowPath(slow_path); - // if (rb_state == ReadBarrier::gray_ptr_) + // if (rb_state == ReadBarrier::GrayState()) // ref = ReadBarrier::Mark(ref); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index c00ee553ef..e69528e43f 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -23,6 +23,7 @@ #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "intrinsics_arm_vixl.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "thread.h" @@ -1474,19 +1475,36 @@ void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* i // art::PrepareForRegisterAllocation. DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); - // TODO(VIXL): TryDispatch + IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_); + if (intrinsic.TryDispatch(invoke)) { + if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); + } + return; + } HandleInvoke(invoke); // TODO(VIXL): invoke->HasPcRelativeDexCache() } +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) { + if (invoke->GetLocations()->Intrinsified()) { + IntrinsicCodeGeneratorARMVIXL intrinsic(codegen); + intrinsic.Dispatch(invoke); + return true; + } + return false; +} + void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { // Explicit clinit checks triggered by static invokes must have been pruned by // art::PrepareForRegisterAllocation. DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); - // TODO(VIXL): TryGenerateIntrinsicCode + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } LocationSummary* locations = invoke->GetLocations(); DCHECK(locations->HasTemps()); @@ -1502,13 +1520,18 @@ void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) { } void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { - // TODO(VIXL): TryDispatch + IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_); + if (intrinsic.TryDispatch(invoke)) { + return; + } HandleInvoke(invoke); } void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { - // TODO(VIXL): TryGenerateIntrinsicCode + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); @@ -5039,6 +5062,39 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( } } +void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier( + HInstruction* instruction ATTRIBUTE_UNUSED, + Location ref ATTRIBUTE_UNUSED, + vixl::aarch32::Register obj ATTRIBUTE_UNUSED, + uint32_t offset ATTRIBUTE_UNUSED, + Location temp ATTRIBUTE_UNUSED, + bool needs_null_check ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); +} + +void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier( + HInstruction* instruction ATTRIBUTE_UNUSED, + Location ref ATTRIBUTE_UNUSED, + vixl::aarch32::Register obj ATTRIBUTE_UNUSED, + uint32_t offset ATTRIBUTE_UNUSED, + Location index ATTRIBUTE_UNUSED, + ScaleFactor scale_factor ATTRIBUTE_UNUSED, + Location temp ATTRIBUTE_UNUSED, + bool needs_null_check ATTRIBUTE_UNUSED, + bool always_update_field ATTRIBUTE_UNUSED, + vixl::aarch32::Register* temp2 ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); +} + +void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED, + Location out ATTRIBUTE_UNUSED, + Location ref ATTRIBUTE_UNUSED, + Location obj ATTRIBUTE_UNUSED, + uint32_t offset ATTRIBUTE_UNUSED, + Location index ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); +} + void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED, Location out, Location ref ATTRIBUTE_UNUSED, diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index efd33c7025..2f946e4263 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -7093,7 +7093,7 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering // HeapReference<Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. // } @@ -7111,14 +7111,13 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); - // if (rb_state == ReadBarrier::gray_ptr_) + // if (rb_state == ReadBarrier::GrayState()) // ref = ReadBarrier::Mark(ref); // At this point, just do the "if" and make sure that flags are preserved until the branch. __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index fcabeeae5d..232c3b3cbb 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -5740,7 +5740,19 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } -static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { +static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { + if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) { + // We need a temporary for holding the iftable length. + return true; + } + return kEmitCompilerReadBarrier && + !kUseBakerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck); +} + +static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { return kEmitCompilerReadBarrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -5778,7 +5790,7 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for // some cases. - if (TypeCheckNeedsATemporary(type_check_kind)) { + if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } @@ -5791,7 +5803,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { Location cls = locations->InAt(1); Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); - Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ? + Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -5809,7 +5821,11 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); switch (type_check_kind) { case TypeCheckKind::kExactCheck: { @@ -5970,33 +5986,45 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } } -void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); +bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. - break; + return !throws_into_catch && !kEmitCompilerReadBarrier; + case TypeCheckKind::kInterfaceCheck: + return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; + return false; } + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + +void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { + bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_fatal_slow_path = IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch); + LocationSummary::CallKind call_kind = is_fatal_slow_path + ? LocationSummary::kNoCall + : LocationSummary::kCallOnSlowPath; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); + if (type_check_kind == TypeCheckKind::kInterfaceCheck) { + // Require a register for the interface check since there is a loop that compares the class to + // a memory address. + locations->SetInAt(1, Location::RequiresRegister()); + } else { + locations->SetInAt(1, Location::Any()); + } + // Note that TypeCheckSlowPathX86_64 uses this "temp" register too. locations->AddTemp(Location::RequiresRegister()); // When read barriers are enabled, we need an additional temporary // register for some cases. - if (TypeCheckNeedsATemporary(type_check_kind)) { + if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } @@ -6009,20 +6037,19 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? + Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ? locations->GetTemp(1) : Location::NoLocation(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); - uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); + const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); + const int object_array_data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); bool is_type_check_slow_path_fatal = - (type_check_kind == TypeCheckKind::kExactCheck || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck) && - !instruction->CanThrowIntoCatchBlock(); + IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock()); SlowPathCode* type_check_slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, is_type_check_slow_path_fatal); @@ -6039,8 +6066,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); } else { @@ -6063,8 +6093,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. NearLabel loop, compare_classes; @@ -6083,7 +6116,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -6107,8 +6144,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); // Walk over the class hierarchy to find a match. NearLabel loop; __ Bind(&loop); @@ -6133,7 +6173,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6152,8 +6196,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); // Do an exact check. NearLabel check_non_primitive_component_type; if (cls.IsRegister()) { @@ -6180,7 +6227,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -6188,7 +6239,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6197,17 +6252,15 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: NearLabel done; + // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { __ testl(obj, obj); __ j(kEqual, &done); } - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); - // We always go into the type check slow path for the unresolved - // and interface check cases. + // We always go into the type check slow path for the unresolved case. // // We cannot directly call the CheckCast runtime entry point // without resorting to a type checking slow path here (i.e. by @@ -6223,6 +6276,53 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // case of the `switch` code as it was previously (with a direct // call to the runtime not using a type checking slow path). // This should also be beneficial for the other cases above. + + // Fast path for the interface check. Since we compare with a memory location in the inner + // loop we would need to have cls poisoned. However unpoisoning cls would reset the + // conditional flags and cause the conditional jump to be incorrect. + if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) { + // Try to avoid read barriers to improve the fast path. We can not get false positives by + // doing this. + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + /*emit_read_barrier*/ false); + + // /* HeapReference<Class> */ temp = temp->iftable_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + temp_loc, + iftable_offset, + /*emit_read_barrier*/ false); + NearLabel is_null; + // Null iftable means it is empty. + __ testl(temp_loc.AsRegister<CpuRegister>(), temp_loc.AsRegister<CpuRegister>()); + __ j(kZero, &is_null); + + // Loop through the iftable and check if any class matches. + __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), + Address(temp_loc.AsRegister<CpuRegister>(), array_length_offset)); + + NearLabel start_loop; + __ Bind(&start_loop); + __ cmpl(cls.AsRegister<CpuRegister>(), + Address(temp_loc.AsRegister<CpuRegister>(), object_array_data_offset)); + __ j(kEqual, &done); // Return if same class. + // Go to next interface. + __ addq(temp_loc.AsRegister<CpuRegister>(), Immediate(2 * kHeapReferenceSize)); + __ subq(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2)); + __ j(kNotZero, &start_loop); + __ Bind(&is_null); + } + + // Since we clobbered temp_loc holding the class, we need to reload it. + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kEmitCompilerReadBarrier); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6397,10 +6497,11 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset) { + uint32_t offset, + bool emit_read_barrier) { CpuRegister out_reg = out.AsRegister<CpuRegister>(); CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); - if (kEmitCompilerReadBarrier) { + if (emit_read_barrier) { if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6535,7 +6636,7 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering // HeapReference<Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. // } @@ -6553,14 +6654,13 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); - // if (rb_state == ReadBarrier::gray_ptr_) + // if (rb_state == ReadBarrier::GrayState()) // ref = ReadBarrier::Mark(ref); // At this point, just do the "if" and make sure that flags are preserved until the branch. __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 8b19dad0d0..5a6dc54e7a 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -248,7 +248,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset); + uint32_t offset, + bool emit_read_barrier); // Generate a GC root reference load: // // root <- *address diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h index 782110c40a..9b54511340 100644 --- a/compiler/optimizing/instruction_simplifier_arm.h +++ b/compiler/optimizing/instruction_simplifier_arm.h @@ -48,7 +48,7 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor { class InstructionSimplifierArm : public HOptimization { public: InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {} + : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {} static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm"; diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index f71684efe9..d4cb1f14b7 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -82,9 +82,10 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { class InstructionSimplifierArm64 : public HOptimization { public: InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {} - static constexpr const char* kInstructionSimplifierArm64PassName - = "instruction_simplifier_arm64"; + : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {} + + static constexpr const char* kInstructionSimplifierArm64PassName = "instruction_simplifier_arm64"; + void Run() OVERRIDE { InstructionSimplifierArm64Visitor visitor(graph_, stats_); visitor.VisitReversePostOrder(); diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index 04e063c92e..c2b1374f62 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -231,15 +231,6 @@ bool TryExtractArrayAccessAddress(HInstruction* access, HInstruction* array, HInstruction* index, size_t data_offset) { - if (kEmitCompilerReadBarrier) { - // The read barrier instrumentation does not support the - // HIntermediateAddress instruction yet. - // - // TODO: Handle this case properly in the ARM64 and ARM code generator and - // re-enable this optimization; otherwise, remove this TODO. - // b/26601270 - return false; - } if (index->IsConstant() || (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) { // When the index is a constant all the addressing can be fitted in the @@ -251,14 +242,20 @@ bool TryExtractArrayAccessAddress(HInstruction* access, // The access may require a runtime call or the original array pointer. return false; } + if (kEmitCompilerReadBarrier && + access->IsArrayGet() && + access->GetType() == Primitive::kPrimNot) { + // For object arrays, the read barrier instrumentation requires + // the original array pointer. + return false; + } // Proceed to extract the base address computation. HGraph* graph = access->GetBlock()->GetGraph(); ArenaAllocator* arena = graph->GetArena(); HIntConstant* offset = graph->GetIntConstant(data_offset); - HIntermediateAddress* address = - new (arena) HIntermediateAddress(array, offset, kNoDexPc); + HIntermediateAddress* address = new (arena) HIntermediateAddress(array, offset, kNoDexPc); // TODO: Is it ok to not have this on the intermediate address? // address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); access->GetBlock()->InsertInstructionBefore(address, access); diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 8790c1e4f1..93a2340a32 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1945,7 +1945,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { // if (src_ptr != end_ptr) { // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // // Slow-path copy. // do { @@ -1986,9 +1986,8 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { // Given the numeric representation, it's enough to check the low bit of the // rb_state. We do that by shifting the bit out of the lock word with LSRS // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); // Carry flag is the last bit shifted out by LSRS. __ b(read_barrier_slow_path->GetEntryLabel(), CS); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index db1c022868..47e6d9699d 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2659,7 +2659,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { // if (src_ptr != end_ptr) { // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // // Slow-path copy. // do { @@ -2704,9 +2704,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { codegen_->AddSlowPath(read_barrier_slow_path); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); // Fast-path copy. diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc new file mode 100644 index 0000000000..6ff0ca4eab --- /dev/null +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -0,0 +1,2694 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_arm_vixl.h" + +#include "arch/arm/instruction_set_features_arm.h" +#include "code_generator_arm_vixl.h" +#include "common_arm.h" +#include "lock_word.h" +#include "mirror/array-inl.h" + +#include "aarch32/constants-aarch32.h" + +namespace art { +namespace arm { + +#define __ assembler->GetVIXLAssembler()-> + +using helpers::DRegisterFrom; +using helpers::HighRegisterFrom; +using helpers::InputDRegisterAt; +using helpers::InputRegisterAt; +using helpers::InputSRegisterAt; +using helpers::InputVRegisterAt; +using helpers::Int32ConstantFrom; +using helpers::LocationFrom; +using helpers::LowRegisterFrom; +using helpers::LowSRegisterFrom; +using helpers::OutputDRegister; +using helpers::OutputRegister; +using helpers::OutputVRegister; +using helpers::RegisterFrom; +using helpers::SRegisterFrom; + +using namespace vixl::aarch32; // NOLINT(build/namespaces) + +ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() { + return codegen_->GetAssembler(); +} + +ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() { + return codegen_->GetGraph()->GetArena(); +} + +// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an +// intrinsified call. This will copy the arguments into the positions for a regular call. +// +// Note: The actual parameters are required to be in the locations given by the invoke's location +// summary. If an intrinsic modifies those locations before a slowpath call, they must be +// restored! +// +// Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially +// sub-optimal (compared to a direct pointer call), but this is a slow-path. + +class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke) + : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {} + + Location MoveArguments(CodeGenerator* codegen) { + InvokeDexCallingConventionVisitorARM calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor); + return calling_convention_visitor.GetMethodLocation(); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler()); + __ Bind(GetEntryLabel()); + + SaveLiveRegisters(codegen, invoke_->GetLocations()); + + Location method_loc = MoveArguments(codegen); + + if (invoke_->IsInvokeStaticOrDirect()) { + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc); + } else { + codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc); + } + codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); + + // Copy the result back to the expected output. + Location out = invoke_->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. + DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + codegen->MoveFromReturnRegister(out, invoke_->GetType()); + } + + RestoreLiveRegisters(codegen, invoke_->GetLocations()); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; } + + private: + // The instruction where this slow path is happening. + HInvoke* const invoke_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL); +}; + +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction) + : SlowPathCodeARMVIXL(instruction) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + ArmVIXLAssembler* assembler = arm_codegen->GetAssembler(); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); + uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + vixl32::Register dest = InputRegisterAt(instruction_, 2); + Location dest_pos = locations->InAt(3); + vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0)); + vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1)); + vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2)); + vixl32::Register tmp = RegisterFrom(locations->GetTemp(3)); + + __ Bind(GetEntryLabel()); + // Compute the base destination address in `dst_curr_addr`. + if (dest_pos.IsConstant()) { + int32_t constant = Int32ConstantFrom(dest_pos); + __ Add(dst_curr_addr, dest, element_size * constant + offset); + } else { + __ Add(dst_curr_addr, + dest, + Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift)); + __ Add(dst_curr_addr, dst_curr_addr, offset); + } + + vixl32::Label loop; + __ Bind(&loop); + __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); + assembler->MaybeUnpoisonHeapReference(tmp); + // TODO: Inline the mark bit check before calling the runtime? + // tmp = ReadBarrier::Mark(tmp); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more + // explanations.) + DCHECK(!tmp.IsSP()); + DCHECK(!tmp.IsLR()); + DCHECK(!tmp.IsPC()); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary (and not preserved). It thus cannot be used by + // any live register in this slow path. + DCHECK(!src_curr_addr.Is(ip)); + DCHECK(!dst_curr_addr.Is(ip)); + DCHECK(!src_stop_addr.Is(ip)); + DCHECK(!tmp.Is(ip)); + DCHECK(tmp.IsRegister()) << tmp; + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode()); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + assembler->MaybePoisonHeapReference(tmp); + __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(ne, &loop); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierSystemArrayCopySlowPathARMVIXL"; + } + + private: + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL); +}; + +IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen) + : arena_(codegen->GetGraph()->GetArena()), + assembler_(codegen->GetAssembler()), + features_(codegen->GetInstructionSetFeatures()) {} + +bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + LocationSummary* res = invoke->GetLocations(); + if (res == nullptr) { + return false; + } + return res->Intrinsified(); +} + +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + if (is64bit) { + __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input)); + } else { + __ Vmov(RegisterFrom(output), SRegisterFrom(input)); + } +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + if (is64bit) { + __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input)); + } else { + __ Vmov(SRegisterFrom(output), RegisterFrom(input)); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); +} +void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); +} +void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +static void GenNumberOfLeadingZeros(LocationSummary* locations, + Primitive::Type type, + ArmVIXLAssembler* assembler) { + Location in = locations->InAt(0); + vixl32::Register out = RegisterFrom(locations->Out()); + + DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); + + if (type == Primitive::kPrimLong) { + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + vixl32::Label end; + __ Clz(out, in_reg_hi); + __ Cbnz(in_reg_hi, &end); + __ Clz(out, in_reg_lo); + __ Add(out, out, 32); + __ Bind(&end); + } else { + __ Clz(out, RegisterFrom(in)); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +static void GenNumberOfTrailingZeros(LocationSummary* locations, + Primitive::Type type, + ArmVIXLAssembler* assembler) { + DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); + + vixl32::Register out = RegisterFrom(locations->Out()); + + if (type == Primitive::kPrimLong) { + vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); + vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); + vixl32::Label end; + __ Rbit(out, in_reg_lo); + __ Clz(out, out); + __ Cbnz(in_reg_lo, &end); + __ Rbit(out, in_reg_hi); + __ Clz(out, out); + __ Add(out, out, 32); + __ Bind(&end); + } else { + vixl32::Register in = RegisterFrom(locations->InAt(0)); + __ Rbit(out, in); + __ Clz(out, out); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { + GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { + GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { + __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0)); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { + MathAbsFP(invoke, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { + MathAbsFP(invoke, GetAssembler()); +} + +static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenAbsInteger(LocationSummary* locations, + bool is64bit, + ArmVIXLAssembler* assembler) { + Location in = locations->InAt(0); + Location output = locations->Out(); + + vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); + + if (is64bit) { + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + vixl32::Register out_reg_lo = LowRegisterFrom(output); + vixl32::Register out_reg_hi = HighRegisterFrom(output); + + DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; + + __ Asr(mask, in_reg_hi, 31); + __ Adds(out_reg_lo, in_reg_lo, mask); + __ Adc(out_reg_hi, in_reg_hi, mask); + __ Eor(out_reg_lo, mask, out_reg_lo); + __ Eor(out_reg_hi, mask, out_reg_hi); + } else { + vixl32::Register in_reg = RegisterFrom(in); + vixl32::Register out_reg = RegisterFrom(output); + + __ Asr(mask, in_reg, 31); + __ Add(out_reg, in_reg, mask); + __ Eor(out_reg, mask, out_reg); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); +} + + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); +} + +static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { + vixl32::Register op1 = InputRegisterAt(invoke, 0); + vixl32::Register op2 = InputRegisterAt(invoke, 1); + vixl32::Register out = OutputRegister(invoke); + + __ Cmp(op1, op2); + + { + AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ ite(is_min ? lt : gt); + __ mov(is_min ? lt : gt, out, op1); + __ mov(is_min ? ge : le, out, op2); + } +} + +static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { + GenMinMax(invoke, /* is_min */ true, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { + GenMinMax(invoke, /* is_min */ false, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ Ldrsb(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0))); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ Ldr(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0))); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0)); + // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor + // exception. So we can't use ldrd as addr may be unaligned. + vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out()); + vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out()); + if (addr.Is(lo)) { + __ Ldr(hi, MemOperand(addr, 4)); + __ Ldr(lo, addr); + } else { + __ Ldr(lo, addr); + __ Ldr(hi, MemOperand(addr, 4)); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ Ldrsh(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0))); +} + +static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Strb(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0))); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Str(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0))); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0)); + // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor + // exception. So we can't use ldrd as addr may be unaligned. + __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), addr); + __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4)); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Strh(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0))); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Ldr(OutputRegister(invoke), + MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value())); +} + +static void GenUnsafeGet(HInvoke* invoke, + Primitive::Type type, + bool is_volatile, + CodeGeneratorARMVIXL* codegen) { + LocationSummary* locations = invoke->GetLocations(); + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + Location base_loc = locations->InAt(1); + vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer. + Location offset_loc = locations->InAt(2); + vixl32::Register offset = LowRegisterFrom(offset_loc); // Long offset, lo part only. + Location trg_loc = locations->Out(); + + switch (type) { + case Primitive::kPrimInt: { + vixl32::Register trg = RegisterFrom(trg_loc); + __ Ldr(trg, MemOperand(base, offset)); + if (is_volatile) { + __ Dmb(vixl32::ISH); + } + break; + } + + case Primitive::kPrimNot: { + vixl32::Register trg = RegisterFrom(trg_loc); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false); + if (is_volatile) { + __ Dmb(vixl32::ISH); + } + } else { + __ Ldr(trg, MemOperand(base, offset)); + if (is_volatile) { + __ Dmb(vixl32::ISH); + } + codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + } + } else { + __ Ldr(trg, MemOperand(base, offset)); + if (is_volatile) { + __ Dmb(vixl32::ISH); + } + assembler->MaybeUnpoisonHeapReference(trg); + } + break; + } + + case Primitive::kPrimLong: { + vixl32::Register trg_lo = LowRegisterFrom(trg_loc); + vixl32::Register trg_hi = HighRegisterFrom(trg_loc); + if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { + __ Ldrexd(trg_lo, trg_hi, MemOperand(base, offset)); + } else { + __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset)); + } + if (is_volatile) { + __ Dmb(vixl32::ISH); + } + break; + } + + default: + LOG(FATAL) << "Unexpected type " << type; + UNREACHABLE(); + } +} + +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); + LocationSummary* locations = new (arena) LocationSummary(invoke, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), + kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); +} + +static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, + const ArmInstructionSetFeatures& features, + Primitive::Type type, + bool is_volatile, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + + if (type == Primitive::kPrimLong) { + // Potentially need temps for ldrexd-strexd loop. + if (is_volatile && !features.HasAtomicLdrdAndStrd()) { + locations->AddTemp(Location::RequiresRegister()); // Temp_lo. + locations->AddTemp(Location::RequiresRegister()); // Temp_hi. + } + } else if (type == Primitive::kPrimNot) { + // Temps for card-marking. + locations->AddTemp(Location::RequiresRegister()); // Temp. + locations->AddTemp(Location::RequiresRegister()); // Card. + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { + CreateIntIntIntIntToVoid( + arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid( + arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid( + arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke); +} + +static void GenUnsafePut(LocationSummary* locations, + Primitive::Type type, + bool is_volatile, + bool is_ordered, + CodeGeneratorARMVIXL* codegen) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + + vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer. + vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only. + vixl32::Register value; + + if (is_volatile || is_ordered) { + __ Dmb(vixl32::ISH); + } + + if (type == Primitive::kPrimLong) { + vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3)); + vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3)); + value = value_lo; + if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { + vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0)); + vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1)); + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp_reg = temps.Acquire(); + + __ Add(temp_reg, base, offset); + vixl32::Label loop_head; + __ Bind(&loop_head); + __ Ldrexd(temp_lo, temp_hi, temp_reg); + __ Strexd(temp_lo, value_lo, value_hi, temp_reg); + __ Cmp(temp_lo, 0); + __ B(ne, &loop_head); + } else { + __ Strd(value_lo, value_hi, MemOperand(base, offset)); + } + } else { + value = RegisterFrom(locations->InAt(3)); + vixl32::Register source = value; + if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + __ Mov(temp, value); + assembler->PoisonHeapReference(temp); + source = temp; + } + __ Str(source, MemOperand(base, offset)); + } + + if (is_volatile) { + __ Dmb(vixl32::ISH); + } + + if (type == Primitive::kPrimNot) { + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + vixl32::Register card = RegisterFrom(locations->GetTemp(1)); + bool value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(temp, card, base, value, value_can_be_null); + } +} + +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); +} + +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); + LocationSummary* locations = new (arena) LocationSummary(invoke, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->SetInAt(4, Location::RequiresRegister()); + + // If heap poisoning is enabled, we don't want the unpoisoning + // operations to potentially clobber the output. Likewise when + // emitting a (Baker) read barrier, which may call. + Location::OutputOverlap overlaps = + ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap; + locations->SetOut(Location::RequiresRegister(), overlaps); + + // Temporary registers used in CAS. In the object case + // (UnsafeCASObject intrinsic), these are also used for + // card-marking, and possibly for (Baker) read barrier. + locations->AddTemp(Location::RequiresRegister()); // Pointer. + locations->AddTemp(Location::RequiresRegister()); // Temp 1. +} + +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) { + DCHECK_NE(type, Primitive::kPrimLong); + + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Location out_loc = locations->Out(); + vixl32::Register out = OutputRegister(invoke); // Boolean result. + + vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer. + Location offset_loc = locations->InAt(2); + vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B). + vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected. + vixl32::Register value = InputRegisterAt(invoke, 4); // Value. + + Location tmp_ptr_loc = locations->GetTemp(0); + vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory. + vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory. + + if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + // Mark card for object assuming new value is stored. Worst case we will mark an unchanged + // object and scan the receiver at the next GC for nothing. + bool value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + ScaleFactor::TIMES_1, + tmp_ptr_loc, + /* needs_null_check */ false, + /* always_update_field */ true, + &tmp); + } + } + + // Prevent reordering with prior memory operations. + // Emit a DMB ISH instruction instead of an DMB ISHST one, as the + // latter allows a preceding load to be delayed past the STXR + // instruction below. + __ Dmb(vixl32::ISH); + + __ Add(tmp_ptr, base, offset); + + if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + codegen->GetAssembler()->PoisonHeapReference(expected); + if (value.Is(expected)) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. + } else { + codegen->GetAssembler()->PoisonHeapReference(value); + } + } + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + // result = tmp != 0; + + vixl32::Label loop_head; + __ Bind(&loop_head); + + __ Ldrex(tmp, tmp_ptr); + + __ Subs(tmp, tmp, expected); + + { + AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ itt(eq); + __ strex(eq, tmp, value, tmp_ptr); + __ cmp(eq, tmp, 1); + } + + __ B(eq, &loop_head); + + __ Dmb(vixl32::ISH); + + __ Rsbs(out, tmp, 1); + + { + AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ it(cc); + __ mov(cc, out, 0); + } + + if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + codegen->GetAssembler()->UnpoisonHeapReference(expected); + if (value.Is(expected)) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. + } else { + codegen->GetAssembler()->UnpoisonHeapReference(value); + } + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + return; + } + + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { + GenCas(invoke, Primitive::kPrimInt, codegen_); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + GenCas(invoke, Primitive::kPrimNot, codegen_); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) { + // The inputs plus one temp. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + invoke->InputAt(1)->CanBeNull() + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + // Need temporary registers for String compression's feature. + if (mirror::kUseStringCompression) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + vixl32::Register str = InputRegisterAt(invoke, 0); + vixl32::Register arg = InputRegisterAt(invoke, 1); + vixl32::Register out = OutputRegister(invoke); + + vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0)); + vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); + vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); + vixl32::Register temp3, temp4; + if (mirror::kUseStringCompression) { + temp3 = RegisterFrom(locations->GetTemp(3)); + temp4 = RegisterFrom(locations->GetTemp(4)); + } + + vixl32::Label loop; + vixl32::Label find_char_diff; + vixl32::Label end; + vixl32::Label different_compression; + + // Get offsets of count and value fields within a string object. + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Take slow path and throw if input can be and is null. + SlowPathCodeARMVIXL* slow_path = nullptr; + const bool can_slow_path = invoke->InputAt(1)->CanBeNull(); + if (can_slow_path) { + slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); + codegen_->AddSlowPath(slow_path); + __ Cbz(arg, slow_path->GetEntryLabel()); + } + + // Reference equality check, return 0 if same reference. + __ Subs(out, str, arg); + __ B(eq, &end); + + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + vixl32::Register temp_reg = temps.Acquire(); + + if (mirror::kUseStringCompression) { + // Load lengths of this and argument strings. + __ Ldr(temp3, MemOperand(str, count_offset)); + __ Ldr(temp4, MemOperand(arg, count_offset)); + // Clean out compression flag from lengths. + __ Bic(temp0, temp3, 0x80000000); + __ Bic(temp_reg, temp4, 0x80000000); + } else { + // Load lengths of this and argument strings. + __ Ldr(temp0, MemOperand(str, count_offset)); + __ Ldr(temp_reg, MemOperand(arg, count_offset)); + } + // out = length diff. + __ Subs(out, temp0, temp_reg); + // temp0 = min(len(str), len(arg)). + + { + AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ it(gt); + __ mov(gt, temp0, temp_reg); + } + + temps.Release(temp_reg); + // Shorter string is empty? + __ Cbz(temp0, &end); + + if (mirror::kUseStringCompression) { + // Check if both strings using same compression style to use this comparison loop. + __ Eors(temp3, temp3, temp4); + __ B(mi, &different_compression); + } + // Store offset of string value in preparation for comparison loop. + __ Mov(temp1, value_offset); + if (mirror::kUseStringCompression) { + // For string compression, calculate the number of bytes to compare (not chars). + // This could in theory exceed INT32_MAX, so treat temp0 as unsigned. + __ Cmp(temp4, 0); + + AssemblerAccurateScope aas(assembler->GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ it(ge); + __ add(ge, temp0, temp0, temp0); + } + + // Assertions that must hold in order to compare multiple characters at a time. + CHECK_ALIGNED(value_offset, 8); + static_assert(IsAligned<8>(kObjectAlignment), + "String data must be 8-byte aligned for unrolled CompareTo loop."); + + const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + DCHECK_EQ(char_size, 2u); + + vixl32::Label find_char_diff_2nd_cmp; + // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment). + __ Bind(&loop); + temp_reg = temps.Acquire(); + __ Ldr(temp_reg, MemOperand(str, temp1)); + __ Ldr(temp2, MemOperand(arg, temp1)); + __ Cmp(temp_reg, temp2); + __ B(ne, &find_char_diff); + __ Add(temp1, temp1, char_size * 2); + + __ Ldr(temp_reg, MemOperand(str, temp1)); + __ Ldr(temp2, MemOperand(arg, temp1)); + __ Cmp(temp_reg, temp2); + __ B(ne, &find_char_diff_2nd_cmp); + __ Add(temp1, temp1, char_size * 2); + // With string compression, we have compared 8 bytes, otherwise 4 chars. + __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4)); + __ B(hi, &loop); + __ B(&end); + + __ Bind(&find_char_diff_2nd_cmp); + if (mirror::kUseStringCompression) { + __ Subs(temp0, temp0, 4); // 4 bytes previously compared. + __ B(ls, &end); // Was the second comparison fully beyond the end? + } else { + // Without string compression, we can start treating temp0 as signed + // and rely on the signed comparison below. + __ Sub(temp0, temp0, 2); + } + + // Find the single character difference. + __ Bind(&find_char_diff); + // Get the bit position of the first character that differs. + __ Eor(temp1, temp2, temp_reg); + __ Rbit(temp1, temp1); + __ Clz(temp1, temp1); + + // temp0 = number of characters remaining to compare. + // (Without string compression, it could be < 1 if a difference is found by the second CMP + // in the comparison loop, and after the end of the shorter string data). + + // Without string compression (temp1 >> 4) = character where difference occurs between the last + // two words compared, in the interval [0,1]. + // (0 for low half-word different, 1 for high half-word different). + // With string compression, (temp1 << 3) = byte where the difference occurs, + // in the interval [0,3]. + + // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside + // the remaining string data, so just return length diff (out). + // The comparison is unsigned for string compression, otherwise signed. + __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4))); + __ B((mirror::kUseStringCompression ? ls : le), &end); + // Extract the characters and calculate the difference. + vixl32::Label uncompressed_string, continue_process; + if (mirror::kUseStringCompression) { + __ Cmp(temp4, 0); + __ B(ge, &uncompressed_string); + __ Bic(temp1, temp1, 0x7); + __ B(&continue_process); + } + __ Bind(&uncompressed_string); + __ Bic(temp1, temp1, 0xf); + __ Bind(&continue_process); + + __ Lsr(temp2, temp2, temp1); + __ Lsr(temp_reg, temp_reg, temp1); + vixl32::Label calculate_difference, uncompressed_string_extract_chars; + if (mirror::kUseStringCompression) { + __ Cmp(temp4, 0); + __ B(ge, &uncompressed_string_extract_chars); + __ Ubfx(temp2, temp2, 0, 8); + __ Ubfx(temp_reg, temp_reg, 0, 8); + __ B(&calculate_difference); + } + __ Bind(&uncompressed_string_extract_chars); + __ Movt(temp2, 0); + __ Movt(temp_reg, 0); + __ Bind(&calculate_difference); + __ Sub(out, temp_reg, temp2); + temps.Release(temp_reg); + __ B(&end); + + if (mirror::kUseStringCompression) { + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + vixl32::Label loop_arg_compressed, loop_this_compressed, find_diff; + // Comparison for different compression style. + // This part is when THIS is compressed and ARG is not. + __ Bind(&different_compression); + __ Add(temp2, str, value_offset); + __ Add(temp3, arg, value_offset); + __ Cmp(temp4, 0); + __ B(lt, &loop_arg_compressed); + + __ Bind(&loop_this_compressed); + temp_reg = temps.Acquire(); + __ Ldrb(temp_reg, MemOperand(temp2, c_char_size, PostIndex)); + __ Ldrh(temp4, MemOperand(temp3, char_size, PostIndex)); + __ Cmp(temp_reg, temp4); + __ B(ne, &find_diff); + __ Subs(temp0, temp0, 1); + __ B(gt, &loop_this_compressed); + __ B(&end); + + // This part is when THIS is not compressed and ARG is. + __ Bind(&loop_arg_compressed); + __ Ldrh(temp_reg, MemOperand(temp2, char_size, PostIndex)); + __ Ldrb(temp4, MemOperand(temp3, c_char_size, PostIndex)); + __ Cmp(temp_reg, temp4); + __ B(ne, &find_diff); + __ Subs(temp0, temp0, 1); + __ B(gt, &loop_arg_compressed); + __ B(&end); + + // Calculate the difference. + __ Bind(&find_diff); + __ Sub(out, temp_reg, temp4); + temps.Release(temp_reg); + } + + __ Bind(&end); + + if (can_slow_path) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Temporary registers to store lengths of strings and for calculations. + // Using instruction cbz requires a low register, so explicitly set a temp to be R0. + locations->AddTemp(LocationFrom(r0)); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + vixl32::Register str = InputRegisterAt(invoke, 0); + vixl32::Register arg = InputRegisterAt(invoke, 1); + vixl32::Register out = OutputRegister(invoke); + + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); + vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); + + vixl32::Label loop, preloop; + vixl32::Label end; + vixl32::Label return_true; + vixl32::Label return_false; + + // Get offsets of count, value, and class fields within a string object. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + StringEqualsOptimizations optimizations(invoke); + if (!optimizations.GetArgumentNotNull()) { + // Check if input is null, return false if it is. + __ Cbz(arg, &return_false); + } + + if (!optimizations.GetArgumentIsString()) { + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ Ldr(temp, MemOperand(str, class_offset)); + __ Ldr(temp1, MemOperand(arg, class_offset)); + __ Cmp(temp, temp1); + __ B(ne, &return_false); + } + + // Load lengths of this and argument strings. + __ Ldr(temp, MemOperand(str, count_offset)); + __ Ldr(temp1, MemOperand(arg, count_offset)); + // Check if lengths are equal, return false if they're not. + // Also compares the compression style, if differs return false. + __ Cmp(temp, temp1); + __ B(ne, &return_false); + // Return true if both strings are empty. + if (mirror::kUseStringCompression) { + // Length needs to be masked out first because 0 is treated as compressed. + __ Bic(temp, temp, 0x80000000); + } + __ Cbz(temp, &return_true); + // Reference equality check, return true if same reference. + __ Cmp(str, arg); + __ B(eq, &return_true); + + // Assertions that must hold in order to compare strings 2 characters at a time. + DCHECK_ALIGNED(value_offset, 4); + static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare."); + + if (mirror::kUseStringCompression) { + // If not compressed, directly to fast compare. Else do preprocess on length. + __ Cmp(temp1, 0); + __ B(gt, &preloop); + // Mask out compression flag and adjust length for compressed string (8-bit) + // as if it is a 16-bit data, new_length = (length + 1) / 2. + __ Add(temp, temp, 1); + __ Lsr(temp, temp, 1); + __ Bind(&preloop); + } + // Loop to compare strings 2 characters at a time starting at the front of the string. + // Ok to do this because strings with an odd length are zero-padded. + __ Mov(temp1, value_offset); + __ Bind(&loop); + __ Ldr(out, MemOperand(str, temp1)); + __ Ldr(temp2, MemOperand(arg, temp1)); + __ Cmp(out, temp2); + __ B(ne, &return_false); + __ Add(temp1, temp1, sizeof(uint32_t)); + __ Subs(temp, temp, sizeof(uint32_t) / sizeof(uint16_t)); + __ B(gt, &loop); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ Mov(out, 1); + __ B(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ Mov(out, 0); + __ Bind(&end); +} + +static void GenerateVisitStringIndexOf(HInvoke* invoke, + ArmVIXLAssembler* assembler, + CodeGeneratorARMVIXL* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. + SlowPathCodeARMVIXL* slow_path = nullptr; + HInstruction* code_point = invoke->InputAt(1); + if (code_point->IsIntConstant()) { + if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > + std::numeric_limits<uint16_t>::max()) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke); + codegen->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else if (code_point->GetType() != Primitive::kPrimChar) { + vixl32::Register char_reg = InputRegisterAt(invoke, 1); + // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`. + __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1); + slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke); + codegen->AddSlowPath(slow_path); + __ B(hs, slow_path->GetEntryLabel()); + } + + if (start_at_zero) { + vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0)); + DCHECK(tmp_reg.Is(r2)); + // Start-index = 0. + __ Mov(tmp_reg, 0); + } + + codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); + CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>(); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnMainAndSlowPath, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetOut(LocationFrom(r0)); + + // Need to send start-index=0. + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) { + GenerateVisitStringIndexOf( + invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnMainAndSlowPath, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetOut(LocationFrom(r0)); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateVisitStringIndexOf( + invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnMainAndSlowPath, + kIntrinsified); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3))); + locations->SetOut(LocationFrom(r0)); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + vixl32::Register byte_array = InputRegisterAt(invoke, 0); + __ Cmp(byte_array, 0); + SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); + codegen_->AddSlowPath(slow_path); + __ B(eq, slow_path->GetEntryLabel()); + + codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); + CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnMainOnly, + kIntrinsified); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetOut(LocationFrom(r0)); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) { + // No need to emit code checking whether `locations->InAt(2)` is a null + // pointer, as callers of the native method + // + // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) + // + // all include a null check on `data` before calling that method. + codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnMainAndSlowPath, + kIntrinsified); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetOut(LocationFrom(r0)); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + vixl32::Register string_to_copy = InputRegisterAt(invoke, 0); + __ Cmp(string_to_copy, 0); + SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); + codegen_->AddSlowPath(slow_path); + __ B(eq, slow_path->GetEntryLabel()); + + codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path); + CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); + + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + return; + } + + CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); + LocationSummary* locations = invoke->GetLocations(); + if (locations == nullptr) { + return; + } + + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + + if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) { + locations->SetInAt(1, Location::RequiresRegister()); + } + if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) { + locations->SetInAt(3, Location::RequiresRegister()); + } + if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) { + locations->SetInAt(4, Location::RequiresRegister()); + } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register IP cannot be used in + // ReadBarrierSystemArrayCopySlowPathARM (because that register + // is clobbered by ReadBarrierMarkRegX entry points). Get an extra + // temporary register from the register allocator. + locations->AddTemp(Location::RequiresRegister()); + } +} + +static void CheckPosition(ArmVIXLAssembler* assembler, + Location pos, + vixl32::Register input, + Location length, + SlowPathCodeARMVIXL* slow_path, + vixl32::Register temp, + bool length_is_input_length = false) { + // Where is the length in the Array? + const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); + + if (pos.IsConstant()) { + int32_t pos_const = Int32ConstantFrom(pos); + if (pos_const == 0) { + if (!length_is_input_length) { + // Check that length(input) >= length. + __ Ldr(temp, MemOperand(input, length_offset)); + if (length.IsConstant()) { + __ Cmp(temp, Int32ConstantFrom(length)); + } else { + __ Cmp(temp, RegisterFrom(length)); + } + __ B(lt, slow_path->GetEntryLabel()); + } + } else { + // Check that length(input) >= pos. + __ Ldr(temp, MemOperand(input, length_offset)); + __ Subs(temp, temp, pos_const); + __ B(lt, slow_path->GetEntryLabel()); + + // Check that (length(input) - pos) >= length. + if (length.IsConstant()) { + __ Cmp(temp, Int32ConstantFrom(length)); + } else { + __ Cmp(temp, RegisterFrom(length)); + } + __ B(lt, slow_path->GetEntryLabel()); + } + } else if (length_is_input_length) { + // The only way the copy can succeed is if pos is zero. + vixl32::Register pos_reg = RegisterFrom(pos); + __ Cbnz(pos_reg, slow_path->GetEntryLabel()); + } else { + // Check that pos >= 0. + vixl32::Register pos_reg = RegisterFrom(pos); + __ Cmp(pos_reg, 0); + __ B(lt, slow_path->GetEntryLabel()); + + // Check that pos <= length(input). + __ Ldr(temp, MemOperand(input, length_offset)); + __ Subs(temp, temp, pos_reg); + __ B(lt, slow_path->GetEntryLabel()); + + // Check that (length(input) - pos) >= length. + if (length.IsConstant()) { + __ Cmp(temp, Int32ConstantFrom(length)); + } else { + __ Cmp(temp, RegisterFrom(length)); + } + __ B(lt, slow_path->GetEntryLabel()); + } +} + +void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + ArmVIXLAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + vixl32::Register src = InputRegisterAt(invoke, 0); + Location src_pos = locations->InAt(1); + vixl32::Register dest = InputRegisterAt(invoke, 2); + Location dest_pos = locations->InAt(3); + Location length = locations->InAt(4); + Location temp1_loc = locations->GetTemp(0); + vixl32::Register temp1 = RegisterFrom(temp1_loc); + Location temp2_loc = locations->GetTemp(1); + vixl32::Register temp2 = RegisterFrom(temp2_loc); + Location temp3_loc = locations->GetTemp(2); + vixl32::Register temp3 = RegisterFrom(temp3_loc); + + SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); + + vixl32::Label conditions_on_positions_validated; + SystemArrayCopyOptimizations optimizations(invoke); + + // If source and destination are the same, we go to slow path if we need to do + // forward copying. + if (src_pos.IsConstant()) { + int32_t src_pos_constant = Int32ConstantFrom(src_pos); + if (dest_pos.IsConstant()) { + int32_t dest_pos_constant = Int32ConstantFrom(dest_pos); + if (optimizations.GetDestinationIsSource()) { + // Checked when building locations. + DCHECK_GE(src_pos_constant, dest_pos_constant); + } else if (src_pos_constant < dest_pos_constant) { + __ Cmp(src, dest); + __ B(eq, intrinsic_slow_path->GetEntryLabel()); + } + + // Checked when building locations. + DCHECK(!optimizations.GetDestinationIsSource() + || (src_pos_constant >= Int32ConstantFrom(dest_pos))); + } else { + if (!optimizations.GetDestinationIsSource()) { + __ Cmp(src, dest); + __ B(ne, &conditions_on_positions_validated); + } + __ Cmp(RegisterFrom(dest_pos), src_pos_constant); + __ B(gt, intrinsic_slow_path->GetEntryLabel()); + } + } else { + if (!optimizations.GetDestinationIsSource()) { + __ Cmp(src, dest); + __ B(ne, &conditions_on_positions_validated); + } + if (dest_pos.IsConstant()) { + int32_t dest_pos_constant = Int32ConstantFrom(dest_pos); + __ Cmp(RegisterFrom(src_pos), dest_pos_constant); + } else { + __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos)); + } + __ B(lt, intrinsic_slow_path->GetEntryLabel()); + } + + __ Bind(&conditions_on_positions_validated); + + if (!optimizations.GetSourceIsNotNull()) { + // Bail out if the source is null. + __ Cbz(src, intrinsic_slow_path->GetEntryLabel()); + } + + if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { + // Bail out if the destination is null. + __ Cbz(dest, intrinsic_slow_path->GetEntryLabel()); + } + + // If the length is negative, bail out. + // We have already checked in the LocationsBuilder for the constant case. + if (!length.IsConstant() && + !optimizations.GetCountIsSourceLength() && + !optimizations.GetCountIsDestinationLength()) { + __ Cmp(RegisterFrom(length), 0); + __ B(lt, intrinsic_slow_path->GetEntryLabel()); + } + + // Validity checks: source. + CheckPosition(assembler, + src_pos, + src, + length, + intrinsic_slow_path, + temp1, + optimizations.GetCountIsSourceLength()); + + // Validity checks: dest. + CheckPosition(assembler, + dest_pos, + dest, + length, + intrinsic_slow_path, + temp1, + optimizations.GetCountIsDestinationLength()); + + if (!optimizations.GetDoesNotNeedTypeCheck()) { + // Check whether all elements of the source array are assignable to the component + // type of the destination array. We do two checks: the classes are the same, + // or the destination is Object[]. If none of these checks succeed, we go to the + // slow path. + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); + __ Ldrh(temp1, MemOperand(temp1, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + } + + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false); + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); + __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); + __ Ldrh(temp2, MemOperand(temp2, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); + } + + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); + // Note: if heap poisoning is on, we are comparing two unpoisoned references here. + __ Cmp(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + vixl32::Label do_copy; + __ B(eq, &do_copy); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ Ldr(temp1, MemOperand(temp1, super_offset)); + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ B(ne, intrinsic_slow_path->GetEntryLabel()); + } + } else { + // Non read barrier code. + + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ Ldr(temp1, MemOperand(dest, class_offset)); + // /* HeapReference<Class> */ temp2 = src->klass_ + __ Ldr(temp2, MemOperand(src, class_offset)); + bool did_unpoison = false; + if (!optimizations.GetDestinationIsNonPrimitiveArray() || + !optimizations.GetSourceIsNonPrimitiveArray()) { + // One or two of the references need to be unpoisoned. Unpoison them + // both to make the identity check valid. + assembler->MaybeUnpoisonHeapReference(temp1); + assembler->MaybeUnpoisonHeapReference(temp2); + did_unpoison = true; + } + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + __ Ldr(temp3, MemOperand(temp1, component_offset)); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + assembler->MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ Ldrh(temp3, MemOperand(temp3, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); + } + + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp2->component_type_ + __ Ldr(temp3, MemOperand(temp2, component_offset)); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + assembler->MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ Ldrh(temp3, MemOperand(temp3, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); + } + + __ Cmp(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + vixl32::Label do_copy; + __ B(eq, &do_copy); + if (!did_unpoison) { + assembler->MaybeUnpoisonHeapReference(temp1); + } + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ Ldr(temp1, MemOperand(temp1, component_offset)); + assembler->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ Ldr(temp1, MemOperand(temp1, super_offset)); + // No need to unpoison the result, we're comparing against null. + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ B(ne, intrinsic_slow_path->GetEntryLabel()); + } + } + } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { + DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); + // Bail out if the source is not a non primitive array. + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp3` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ Ldr(temp1, MemOperand(src, class_offset)); + assembler->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + __ Ldr(temp3, MemOperand(temp1, component_offset)); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + assembler->MaybeUnpoisonHeapReference(temp3); + } + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ Ldrh(temp3, MemOperand(temp3, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); + } + + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); + uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + // Compute the base source address in `temp1`. + if (src_pos.IsConstant()) { + int32_t constant = Int32ConstantFrom(src_pos); + __ Add(temp1, src, element_size * constant + offset); + } else { + __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift)); + __ Add(temp1, temp1, offset); + } + + // Compute the end source address in `temp3`. + if (length.IsConstant()) { + int32_t constant = Int32ConstantFrom(length); + __ Add(temp3, temp1, element_size * constant); + } else { + __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift)); + } + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // The base destination address is computed later, as `temp2` is + // used for intermediate computations. + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + vixl32::Label loop, done; + + // Don't enter copy loop if `length == 0`. + __ Cmp(temp1, temp3); + __ B(eq, &done); + + // /* int32_t */ monitor = src->monitor_ + __ Ldr(temp2, MemOperand(src, monitor_offset)); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `temp2`. + __ Add(src, src, Operand(temp2, vixl32::LSR, 32)); + + // Slow path used to copy array when `src` is gray. + SlowPathCodeARMVIXL* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); + // Carry flag is the last bit shifted out by LSRS. + __ B(cs, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + + // Compute the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = Int32ConstantFrom(dest_pos); + __ Add(temp2, dest, element_size * constant + offset); + } else { + __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift)); + __ Add(temp2, temp2, offset); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); + + { + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp_reg = temps.Acquire(); + + __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); + __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); + } + + __ Cmp(temp1, temp3); + __ B(ne, &loop); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); + } else { + // Non read barrier code. + + // Compute the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = Int32ConstantFrom(dest_pos); + __ Add(temp2, dest, element_size * constant + offset); + } else { + __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift)); + __ Add(temp2, temp2, offset); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl32::Label loop, done; + __ Cmp(temp1, temp3); + __ B(eq, &done); + __ Bind(&loop); + + { + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp_reg = temps.Acquire(); + + __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); + __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); + } + + __ Cmp(temp1, temp3); + __ B(ne, &loop); + __ Bind(&done); + } + + // We only need one card marking on the destination array. + codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false); + + __ Bind(intrinsic_slow_path->GetExitLabel()); +} + +static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { + // If the graph is debuggable, all callee-saved floating-point registers are blocked by + // the code generator. Furthermore, the register allocator creates fixed live intervals + // for all caller-saved registers because we are doing a function call. As a result, if + // the input and output locations are unallocated, the register allocator runs out of + // registers and fails; however, a debuggable graph is not the common case. + if (invoke->GetBlock()->GetGraph()->IsDebuggable()) { + return; + } + + DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); + DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble); + DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble); + + LocationSummary* const locations = new (arena) LocationSummary(invoke, + LocationSummary::kCallOnMainOnly, + kIntrinsified); + const InvokeRuntimeCallingConventionARMVIXL calling_convention; + + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + // Native code uses the soft float ABI. + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); +} + +static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { + // If the graph is debuggable, all callee-saved floating-point registers are blocked by + // the code generator. Furthermore, the register allocator creates fixed live intervals + // for all caller-saved registers because we are doing a function call. As a result, if + // the input and output locations are unallocated, the register allocator runs out of + // registers and fails; however, a debuggable graph is not the common case. + if (invoke->GetBlock()->GetGraph()->IsDebuggable()) { + return; + } + + DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); + DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble); + DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble); + DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble); + + LocationSummary* const locations = new (arena) LocationSummary(invoke, + LocationSummary::kCallOnMainOnly, + kIntrinsified); + const InvokeRuntimeCallingConventionARMVIXL calling_convention; + + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + // Native code uses the soft float ABI. + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3))); +} + +static void GenFPToFPCall(HInvoke* invoke, + ArmVIXLAssembler* assembler, + CodeGeneratorARMVIXL* codegen, + QuickEntrypointEnum entry) { + LocationSummary* const locations = invoke->GetLocations(); + + DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); + DCHECK(locations->WillCall() && locations->Intrinsified()); + + // Native code uses the soft float ABI. + __ Vmov(RegisterFrom(locations->GetTemp(0)), + RegisterFrom(locations->GetTemp(1)), + InputDRegisterAt(invoke, 0)); + codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); + __ Vmov(OutputDRegister(invoke), + RegisterFrom(locations->GetTemp(0)), + RegisterFrom(locations->GetTemp(1))); +} + +static void GenFPFPToFPCall(HInvoke* invoke, + ArmVIXLAssembler* assembler, + CodeGeneratorARMVIXL* codegen, + QuickEntrypointEnum entry) { + LocationSummary* const locations = invoke->GetLocations(); + + DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); + DCHECK(locations->WillCall() && locations->Intrinsified()); + + // Native code uses the soft float ABI. + __ Vmov(RegisterFrom(locations->GetTemp(0)), + RegisterFrom(locations->GetTemp(1)), + InputDRegisterAt(invoke, 0)); + __ Vmov(RegisterFrom(locations->GetTemp(2)), + RegisterFrom(locations->GetTemp(3)), + InputDRegisterAt(invoke, 1)); + codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); + __ Vmov(OutputDRegister(invoke), + RegisterFrom(locations->GetTemp(0)), + RegisterFrom(locations->GetTemp(1))); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) { + GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) { + GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) { + GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) { + GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0)); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); + vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); + vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out()); + vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out()); + + __ Rbit(out_reg_lo, in_reg_hi); + __ Rbit(out_reg_hi, in_reg_lo); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0)); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); + vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); + vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out()); + vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out()); + + __ Rev(out_reg_lo, in_reg_hi); + __ Rev(out_reg_hi, in_reg_lo); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0)); +} + +static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) { + DCHECK(Primitive::IsIntOrLongType(type)) << type; + DCHECK_EQ(instr->GetType(), Primitive::kPrimInt); + DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type); + + bool is_long = type == Primitive::kPrimLong; + LocationSummary* locations = instr->GetLocations(); + Location in = locations->InAt(0); + vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in); + vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0; + vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0)); + vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0)); + vixl32::Register out_r = OutputRegister(instr); + + // Move data from core register(s) to temp D-reg for bit count calculation, then move back. + // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg, + // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency, + // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'. + __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0| + __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c| + __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c| + __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c| + if (is_long) { + __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c| + } + __ Vmov(out_r, tmp_s); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) { + GenBitCount(invoke, Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) { + VisitIntegerBitCount(invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) { + GenBitCount(invoke, Primitive::kPrimLong, GetAssembler()); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->SetInAt(4, Location::RequiresRegister()); + + // Temporary registers to store lengths of strings and for calculations. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) { + ArmVIXLAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + // Check assumption that sizeof(Char) is 2 (used in scaling below). + const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + DCHECK_EQ(char_size, 2u); + + // Location of data in char array buffer. + const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); + + // Location of char array data in string. + const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); + + // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin); + // Since getChars() calls getCharsNoCheck() - we use registers rather than constants. + vixl32::Register srcObj = InputRegisterAt(invoke, 0); + vixl32::Register srcBegin = InputRegisterAt(invoke, 1); + vixl32::Register srcEnd = InputRegisterAt(invoke, 2); + vixl32::Register dstObj = InputRegisterAt(invoke, 3); + vixl32::Register dstBegin = InputRegisterAt(invoke, 4); + + vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0)); + vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1)); + vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2)); + + vixl32::Label done, compressed_string_loop; + // dst to be copied. + __ Add(dst_ptr, dstObj, data_offset); + __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1)); + + __ Subs(num_chr, srcEnd, srcBegin); + // Early out for valid zero-length retrievals. + __ B(eq, &done); + + // src range to copy. + __ Add(src_ptr, srcObj, value_offset); + + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + vixl32::Register temp; + vixl32::Label compressed_string_preloop; + if (mirror::kUseStringCompression) { + // Location of count in string. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + temp = temps.Acquire(); + // String's length. + __ Ldr(temp, MemOperand(srcObj, count_offset)); + __ Cmp(temp, 0); + temps.Release(temp); + __ B(lt, &compressed_string_preloop); + } + __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1)); + + // Do the copy. + vixl32::Label loop, remainder; + + temp = temps.Acquire(); + // Save repairing the value of num_chr on the < 4 character path. + __ Subs(temp, num_chr, 4); + __ B(lt, &remainder); + + // Keep the result of the earlier subs, we are going to fetch at least 4 characters. + __ Mov(num_chr, temp); + + // Main loop used for longer fetches loads and stores 4x16-bit characters at a time. + // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code + // to rectify these everywhere this intrinsic applies.) + __ Bind(&loop); + __ Ldr(temp, MemOperand(src_ptr, char_size * 2)); + __ Subs(num_chr, num_chr, 4); + __ Str(temp, MemOperand(dst_ptr, char_size * 2)); + __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex)); + __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex)); + temps.Release(temp); + __ B(ge, &loop); + + __ Adds(num_chr, num_chr, 4); + __ B(eq, &done); + + // Main loop for < 4 character case and remainder handling. Loads and stores one + // 16-bit Java character at a time. + __ Bind(&remainder); + temp = temps.Acquire(); + __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex)); + __ Subs(num_chr, num_chr, 1); + __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); + temps.Release(temp); + __ B(gt, &remainder); + __ B(&done); + + if (mirror::kUseStringCompression) { + const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + DCHECK_EQ(c_char_size, 1u); + // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. + __ Bind(&compressed_string_preloop); + __ Add(src_ptr, src_ptr, srcBegin); + __ Bind(&compressed_string_loop); + temp = temps.Acquire(); + __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex)); + __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); + temps.Release(temp); + __ Subs(num_chr, num_chr, 1); + __ B(gt, &compressed_string_loop); + } + + __ Bind(&done); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { + ArmVIXLAssembler* const assembler = GetAssembler(); + const vixl32::Register out = OutputRegister(invoke); + // Shifting left by 1 bit makes the value encodable as an immediate operand; + // we don't care about the sign bit anyway. + constexpr uint32_t infinity = kPositiveInfinityFloat << 1U; + + __ Vmov(out, InputSRegisterAt(invoke, 0)); + // We don't care about the sign bit, so shift left. + __ Lsl(out, out, 1); + __ Eor(out, out, infinity); + // If the result is 0, then it has 32 leading zeros, and less than that otherwise. + __ Clz(out, out); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { + ArmVIXLAssembler* const assembler = GetAssembler(); + const vixl32::Register out = OutputRegister(invoke); + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + // The highest 32 bits of double precision positive infinity separated into + // two constants encodable as immediate operands. + constexpr uint32_t infinity_high = 0x7f000000U; + constexpr uint32_t infinity_high2 = 0x00f00000U; + + static_assert((infinity_high | infinity_high2) == + static_cast<uint32_t>(kPositiveInfinityDouble >> 32U), + "The constants do not add up to the high 32 bits of double " + "precision positive infinity."); + __ Vmov(temp, out, InputDRegisterAt(invoke, 0)); + __ Eor(out, out, infinity_high); + __ Eor(out, out, infinity_high2); + // We don't care about the sign bit, so shift left. + __ Orr(out, temp, Operand(out, vixl32::LSL, 1)); + // If the result is 0, then it has 32 leading zeros, and less than that otherwise. + __ Clz(out, out); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); +} + +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinDoubleDouble) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinFloatFloat) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxFloatFloat) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinLongLong) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxLongLong) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathCeil) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFloor) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRint) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. +UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit) + +// 1.8. +UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject) + +UNREACHABLE_INTRINSICS(ARMVIXL) + +#undef __ + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h new file mode 100644 index 0000000000..6e79cb76a1 --- /dev/null +++ b/compiler/optimizing/intrinsics_arm_vixl.h @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_ + +#include "intrinsics.h" +#include "utils/arm/assembler_arm_vixl.h" + +namespace art { + +namespace arm { + +class ArmVIXLAssembler; +class CodeGeneratorARMVIXL; + +class IntrinsicLocationsBuilderARMVIXL FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen); + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* arena_; + ArmVIXLAssembler* assembler_; + const ArmInstructionSetFeatures& features_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARMVIXL); +}; + +class IntrinsicCodeGeneratorARMVIXL FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorARMVIXL(CodeGeneratorARMVIXL* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + private: + ArenaAllocator* GetAllocator(); + ArmVIXLAssembler* GetAssembler(); + + CodeGeneratorARMVIXL* codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARMVIXL); +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_ diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index aae3899847..43682c5633 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -3200,7 +3200,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // if (src_ptr != end_ptr) { // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // // Slow-path copy. // for (size_t i = 0; i != length; ++i) { @@ -3222,14 +3222,13 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kEqual, &done); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); - // if (rb_state == ReadBarrier::gray_ptr_) + // if (rb_state == ReadBarrier::GrayState()) // goto slow_path; // At this point, just do the "if" and make sure that flags are preserved until the branch. __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index cdef22f6de..de2606c327 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -1399,7 +1399,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // if (src_ptr != end_ptr) { // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { // // Slow-path copy. // do { @@ -1420,14 +1420,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kEqual, &done); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); - // if (rb_state == ReadBarrier::gray_ptr_) + // if (rb_state == ReadBarrier::GrayState()) // goto slow_path; // At this point, just do the "if" and make sure that flags are preserved until the branch. __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index b91e9e6868..5b2cbf783d 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -663,27 +663,59 @@ class LSEVisitor : public HGraphVisitor { if (predecessors.size() == 0) { return; } + ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0; i < heap_values.size(); i++) { - HInstruction* pred0_value = heap_values_for_[predecessors[0]->GetBlockId()][i]; - heap_values[i] = pred0_value; - if (pred0_value != kUnknownHeapValue) { - for (size_t j = 1; j < predecessors.size(); j++) { - HInstruction* pred_value = heap_values_for_[predecessors[j]->GetBlockId()][i]; - if (pred_value != pred0_value) { - heap_values[i] = kUnknownHeapValue; - break; - } + HInstruction* merged_value = nullptr; + // Whether merged_value is a result that's merged from all predecessors. + bool from_all_predecessors = true; + ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); + HInstruction* singleton_ref = nullptr; + if (ref_info->IsSingletonAndNotReturned()) { + // We do more analysis of liveness when merging heap values for such + // cases since stores into such references may potentially be eliminated. + singleton_ref = ref_info->GetReference(); + } + + for (HBasicBlock* predecessor : predecessors) { + HInstruction* pred_value = heap_values_for_[predecessor->GetBlockId()][i]; + if ((singleton_ref != nullptr) && + !singleton_ref->GetBlock()->Dominates(predecessor)) { + // singleton_ref is not live in this predecessor. Skip this predecessor since + // it does not really have the location. + DCHECK_EQ(pred_value, kUnknownHeapValue); + from_all_predecessors = false; + continue; + } + if (merged_value == nullptr) { + // First seen heap value. + merged_value = pred_value; + } else if (pred_value != merged_value) { + // There are conflicting values. + merged_value = kUnknownHeapValue; + break; } } - if (heap_values[i] == kUnknownHeapValue) { + if (merged_value == kUnknownHeapValue) { + // There are conflicting heap values from different predecessors. // Keep the last store in each predecessor since future loads cannot be eliminated. - for (size_t j = 0; j < predecessors.size(); j++) { - ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessors[j]->GetBlockId()]; + for (HBasicBlock* predecessor : predecessors) { + ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessor->GetBlockId()]; KeepIfIsStore(pred_values[i]); } } + + if ((merged_value == nullptr) || !from_all_predecessors) { + DCHECK(singleton_ref != nullptr); + DCHECK((singleton_ref->GetBlock() == block) || + !singleton_ref->GetBlock()->Dominates(block)); + // singleton_ref is not defined before block or defined only in some of its + // predecessors, so block doesn't really have the location at its entry. + heap_values[i] = kUnknownHeapValue; + } else { + heap_values[i] = merged_value; + } } } diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index d157509758..a9fe209063 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -16,11 +16,16 @@ #include "locations.h" +#include <type_traits> + #include "nodes.h" #include "code_generator.h" namespace art { +// Verify that Location is trivially copyable. +static_assert(std::is_trivially_copyable<Location>::value, "Location should be trivially copyable"); + LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind, bool intrinsified) diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index da27928ef2..52747c0cc4 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -91,12 +91,9 @@ class Location : public ValueObject { DCHECK(!IsValid()); } - Location(const Location& other) : value_(other.value_) {} + Location(const Location& other) = default; - Location& operator=(const Location& other) { - value_ = other.value_; - return *this; - } + Location& operator=(const Location& other) = default; bool IsConstant() const { return (value_ & kLocationConstantMask) == kConstant; @@ -328,7 +325,6 @@ class Location : public ValueObject { LOG(FATAL) << "Should not use this location kind"; } UNREACHABLE(); - return "?"; } // Unallocated locations. diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 51be1d1e91..f4616e39e6 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -28,6 +28,30 @@ static void RemoveFromCycle(HInstruction* instruction) { instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false); } +// Detect a goto block and sets succ to the single successor. +static bool IsGotoBlock(HBasicBlock* block, /*out*/ HBasicBlock** succ) { + if (block->GetPredecessors().size() == 1 && + block->GetSuccessors().size() == 1 && + block->IsSingleGoto()) { + *succ = block->GetSingleSuccessor(); + return true; + } + return false; +} + +// Detect an early exit loop. +static bool IsEarlyExit(HLoopInformation* loop_info) { + HBlocksInLoopReversePostOrderIterator it_loop(*loop_info); + for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) { + for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) { + if (!loop_info->Contains(*successor)) { + return true; + } + } + } + return false; +} + // // Class methods. // @@ -168,7 +192,9 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) { int32_t use_count = 0; if (IsPhiInduction(phi) && IsOnlyUsedAfterLoop(node->loop_info, phi, &use_count) && - TryReplaceWithLastValue(phi, use_count, preheader)) { + // No uses, or no early-exit with proper replacement. + (use_count == 0 || + (!IsEarlyExit(node->loop_info) && TryReplaceWithLastValue(phi, preheader)))) { for (HInstruction* i : *iset_) { RemoveFromCycle(i); } @@ -178,31 +204,57 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) { } void HLoopOptimization::SimplifyBlocks(LoopNode* node) { - for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - // Remove instructions that are dead. - for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) { - HInstruction* instruction = i.Current(); - if (instruction->IsDeadAndRemovable()) { - block->RemoveInstruction(instruction); + // Repeat the block simplifications until no more changes occur. Note that since + // each simplification consists of eliminating code (without introducing new code), + // this process is always finite. + bool changed; + do { + changed = false; + // Iterate over all basic blocks in the loop-body. + for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + // Remove dead instructions from the loop-body. + for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) { + HInstruction* instruction = i.Current(); + if (instruction->IsDeadAndRemovable()) { + changed = true; + block->RemoveInstruction(instruction); + } } - } - // Remove trivial control flow blocks from the loop-body. - if (block->GetPredecessors().size() == 1 && - block->GetSuccessors().size() == 1 && - block->GetFirstInstruction()->IsGoto()) { - HBasicBlock* pred = block->GetSinglePredecessor(); - HBasicBlock* succ = block->GetSingleSuccessor(); - if (succ->GetPredecessors().size() == 1) { + // Remove trivial control flow blocks from the loop-body. + HBasicBlock* succ = nullptr; + if (IsGotoBlock(block, &succ) && succ->GetPredecessors().size() == 1) { + // Trivial goto block can be removed. + HBasicBlock* pred = block->GetSinglePredecessor(); + changed = true; pred->ReplaceSuccessor(block, succ); - block->ClearDominanceInformation(); - block->SetDominator(pred); // needed by next disconnect. + block->RemoveDominatedBlock(succ); block->DisconnectAndDelete(); pred->AddDominatedBlock(succ); succ->SetDominator(pred); + } else if (block->GetSuccessors().size() == 2) { + // Trivial if block can be bypassed to either branch. + HBasicBlock* succ0 = block->GetSuccessors()[0]; + HBasicBlock* succ1 = block->GetSuccessors()[1]; + HBasicBlock* meet0 = nullptr; + HBasicBlock* meet1 = nullptr; + if (succ0 != succ1 && + IsGotoBlock(succ0, &meet0) && + IsGotoBlock(succ1, &meet1) && + meet0 == meet1 && // meets again + meet0 != block && // no self-loop + meet0->GetPhis().IsEmpty()) { // not used for merging + changed = true; + succ0->DisconnectAndDelete(); + if (block->Dominates(meet0)) { + block->RemoveDominatedBlock(meet0); + succ1->AddDominatedBlock(meet0); + meet0->SetDominator(succ1); + } + } } } - } + } while (changed); } void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) { @@ -240,12 +292,12 @@ void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) { if (IsEmptyHeader(header) && IsEmptyBody(body) && IsOnlyUsedAfterLoop(node->loop_info, header->GetFirstPhi(), &use_count) && - TryReplaceWithLastValue(header->GetFirstPhi(), use_count, preheader)) { + // No uses, or proper replacement. + (use_count == 0 || TryReplaceWithLastValue(header->GetFirstPhi(), preheader))) { body->DisconnectAndDelete(); exit->RemovePredecessor(header); header->RemoveSuccessor(exit); - header->ClearDominanceInformation(); - header->SetDominator(preheader); // needed by next disconnect. + header->RemoveDominatedBlock(exit); header->DisconnectAndDelete(); preheader->AddSuccessor(exit); preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator @@ -259,22 +311,23 @@ void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) { bool HLoopOptimization::IsPhiInduction(HPhi* phi) { ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi); if (set != nullptr) { + DCHECK(iset_->empty()); for (HInstruction* i : *set) { - // Check that, other than phi, instruction are removable with uses contained in the cycle. - // TODO: investigate what cases are no longer in the graph. - if (i != phi) { - if (!i->IsInBlock() || !i->IsRemovable()) { - return false; - } + // Check that, other than instructions that are no longer in the graph (removed earlier) + // each instruction is removable and, other than the phi, uses are contained in the cycle. + if (!i->IsInBlock()) { + continue; + } else if (!i->IsRemovable()) { + return false; + } else if (i != phi) { for (const HUseListNode<HInstruction*>& use : i->GetUses()) { if (set->find(use.GetUser()) == set->end()) { return false; } } } + iset_->insert(i); // copy } - DCHECK(iset_->empty()); - iset_->insert(set->begin(), set->end()); // copy return true; } return false; @@ -358,20 +411,16 @@ void HLoopOptimization::ReplaceAllUses(HInstruction* instruction, HInstruction* } } -bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, - int32_t use_count, - HBasicBlock* block) { - // If true uses appear after the loop, replace these uses with the last value. Environment - // uses can consume this value too, since any first true use is outside the loop (although - // this may imply that de-opting may look "ahead" a bit on the phi value). If there are only - // environment uses, the value is dropped altogether, since the computations have no effect. - if (use_count > 0) { - if (!induction_range_.CanGenerateLastValue(instruction)) { - return false; - } +bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block) { + // Try to replace outside uses with the last value. Environment uses can consume this + // value too, since any first true use is outside the loop (although this may imply + // that de-opting may look "ahead" a bit on the phi value). If there are only environment + // uses, the value is dropped altogether, since the computations have no effect. + if (induction_range_.CanGenerateLastValue(instruction)) { ReplaceAllUses(instruction, induction_range_.GenerateLastValue(instruction, graph_, block)); + return true; } - return true; + return false; } } // namespace art diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index e18d17531e..3391bef4e9 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -72,9 +72,7 @@ class HLoopOptimization : public HOptimization { HInstruction* instruction, /*out*/ int32_t* use_count); void ReplaceAllUses(HInstruction* instruction, HInstruction* replacement); - bool TryReplaceWithLastValue(HInstruction* instruction, - int32_t use_count, - HBasicBlock* block); + bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block); // Range information based on prior induction variable analysis. InductionVarRange induction_range_; diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index caf66474eb..5991791a15 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -374,7 +374,9 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { if (current->GetType() == Primitive::kPrimNot) { DCHECK(interval->GetDefinedBy()->IsActualObject()) << interval->GetDefinedBy()->DebugName() - << "@" << safepoint_position->GetInstruction()->DebugName(); + << '(' << interval->GetDefinedBy()->GetId() << ')' + << "@" << safepoint_position->GetInstruction()->DebugName() + << '(' << safepoint_position->GetInstruction()->GetId() << ')'; LocationSummary* locations = safepoint_position->GetLocations(); if (current->GetParent()->HasSpillSlot()) { locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); |