diff options
Diffstat (limited to 'compiler/optimizing')
20 files changed, 1737 insertions, 916 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 5152075499..c532e72465 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1228,7 +1228,8 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in instruction->IsLoadString() || instruction->IsInstanceOf() || instruction->IsCheckCast() || - (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified())) + (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()) || + (instruction->IsInvokeStaticOrDirect() && instruction->GetLocations()->Intrinsified())) << "instruction->DebugName()=" << instruction->DebugName() << " slow_path->GetDescription()=" << slow_path->GetDescription(); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 4c4128c5f8..6d9c55cd75 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -429,7 +429,8 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -441,6 +442,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { DCHECK_NE(reg, SP); DCHECK_NE(reg, LR); DCHECK_NE(reg, PC); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(reg, IP); DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg; // "Compact" slow path, saving two moves. // @@ -5585,55 +5589,15 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address)); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); - // 16-bit LDR immediate has a 5-bit offset multiplied by the size and that gives - // a 128B range. To try and reduce the number of literals if we load multiple strings, - // simply split the dex cache address to a 128B aligned base loaded from a literal - // and the remaining offset embedded in the load. - static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes."); - DCHECK_ALIGNED(load->GetAddress(), 4u); - constexpr size_t offset_bits = /* encoded bits */ 5 + /* scale */ 2; - uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits); - uint32_t offset = address & MaxInt<uint32_t>(offset_bits); - __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address)); - // /* GcRoot<mirror::String> */ out = *(base_address + offset) - GenerateGcRootFieldLoad(load, out_loc, out, offset); - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - Register base_reg = locations->InAt(0).AsRegister<Register>(); - HArmDexCacheArraysBase* base = load->InputAt(0)->AsArmDexCacheArraysBase(); - int32_t offset = load->GetDexCacheElementOffset() - base->GetElementOffset(); - // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset) - GenerateGcRootFieldLoad(load, out_loc, base_reg, offset); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - Register current_method = locations->InAt(0).AsRegister<Register>(); - - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad( - load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - break; - } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); - codegen_->AddSlowPath(slow_path); - __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } static int32_t GetExceptionTlsOffset() { @@ -6413,7 +6377,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. + // Slow path marking the GC root `root`. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root); codegen_->AddSlowPath(slow_path); @@ -6522,7 +6486,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Introduce a dependency on the lock_word including the rb_state, // which shall prevent load-load reordering without using // a memory barrier (which would be more expensive). - // obj is unchanged by this operation, but its value now depends on temp_reg. + // `obj` is unchanged by this operation, but its value now depends + // on `temp_reg`. __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32)); // The actual reference load. @@ -6553,7 +6518,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. + // Slow path marking the object `ref` when it is gray. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); AddSlowPath(slow_path); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index d95e7df6b4..cc8985d0b0 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -595,7 +595,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -607,7 +608,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { DCHECK_NE(obj_.reg(), LR); DCHECK_NE(obj_.reg(), WSP); DCHECK_NE(obj_.reg(), WZR); - // WIP0 is used by the slow path as a temp, it can not be the object register. + // IP0 is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. DCHECK_NE(obj_.reg(), IP0); DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg(); // "Compact" slow path, saving two moves. @@ -4195,7 +4197,6 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { } void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { - Location out_loc = load->GetLocations()->Out(); Register out = OutputRegister(load); switch (load->GetLoadKind()) { @@ -4231,63 +4232,15 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(load->GetAddress())); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - // LDR immediate has a 12-bit offset multiplied by the size and for 32-bit loads - // that gives a 16KiB range. To try and reduce the number of literals if we load - // multiple strings, simply split the dex cache address to a 16KiB aligned base - // loaded from a literal and the remaining offset embedded in the load. - static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes."); - DCHECK_ALIGNED(load->GetAddress(), 4u); - constexpr size_t offset_bits = /* encoded bits */ 12 + /* scale */ 2; - uint64_t base_address = load->GetAddress() & ~MaxInt<uint64_t>(offset_bits); - uint32_t offset = load->GetAddress() & MaxInt<uint64_t>(offset_bits); - __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address)); - // /* GcRoot<mirror::String> */ out = *(base_address + offset) - GenerateGcRootFieldLoad(load, out_loc, out.X(), offset); - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - // Add ADRP with its PC-relative DexCache access patch. - const DexFile& dex_file = load->GetDexFile(); - uint32_t element_offset = load->GetDexCacheElementOffset(); - vixl::aarch64::Label* adrp_label = - codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(out.X(), /* offset placeholder */ 0); - } - // Add LDR with its PC-relative DexCache access patch. - vixl::aarch64::Label* ldr_label = - codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label); - // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, out.X(), /* offset placeholder */ 0, ldr_label); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - Register current_method = InputRegisterAt(load, 0); - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad( - load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())); - break; - } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); - codegen_->AddSlowPath(slow_path); - __ Cbz(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); + codegen_->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { @@ -5088,7 +5041,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. + // Slow path marking the GC root `root`. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root); codegen_->AddSlowPath(slow_path); @@ -5239,7 +5192,8 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // Introduce a dependency on the lock_word including rb_state, // to prevent load-load reordering, and without using // a memory barrier (which would be more expensive). - // obj is unchanged by this operation, but its value now depends on temp. + // `obj` is unchanged by this operation, but its value now depends + // on `temp`. __ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32)); // The actual reference load. @@ -5285,7 +5239,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // Object* ref = ref_addr->AsMirrorPtr() GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. + // Slow path marking the object `ref` when it is gray. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); AddSlowPath(slow_path); diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 58879bc2f1..8a2f90d541 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1833,11 +1833,19 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { } } +auto InstructionCodeGeneratorMIPS::GetImplicitNullChecker(HInstruction* instruction) { + auto null_checker = [this, instruction]() { + this->codegen_->MaybeRecordImplicitNullCheck(instruction); + }; + return null_checker; +} + void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); Register obj = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + auto null_checker = GetImplicitNullChecker(instruction); Primitive::Type type = instruction->GetType(); switch (type) { @@ -1846,10 +1854,10 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset, null_checker); } else { __ Addu(TMP, obj, index.AsRegister<Register>()); - __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset); + __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset, null_checker); } break; } @@ -1859,10 +1867,10 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadSignedByte, out, obj, offset); + __ LoadFromOffset(kLoadSignedByte, out, obj, offset, null_checker); } else { __ Addu(TMP, obj, index.AsRegister<Register>()); - __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset); + __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset, null_checker); } break; } @@ -1872,11 +1880,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_2); __ Addu(TMP, obj, TMP); - __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset); + __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker); } break; } @@ -1886,11 +1894,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_2); __ Addu(TMP, obj, TMP); - __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset); + __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker); } break; } @@ -1902,11 +1910,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadFromOffset(kLoadWord, out, obj, offset); + __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); __ Addu(TMP, obj, TMP); - __ LoadFromOffset(kLoadWord, out, TMP, data_offset); + __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker); } break; } @@ -1916,11 +1924,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadFromOffset(kLoadDoubleword, out, obj, offset); + __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_8); __ Addu(TMP, obj, TMP); - __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset); + __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker); } break; } @@ -1930,11 +1938,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadSFromOffset(out, obj, offset); + __ LoadSFromOffset(out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); __ Addu(TMP, obj, TMP); - __ LoadSFromOffset(out, TMP, data_offset); + __ LoadSFromOffset(out, TMP, data_offset, null_checker); } break; } @@ -1944,11 +1952,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadDFromOffset(out, obj, offset); + __ LoadDFromOffset(out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_8); __ Addu(TMP, obj, TMP); - __ LoadDFromOffset(out, TMP, data_offset); + __ LoadDFromOffset(out, TMP, data_offset, null_checker); } break; } @@ -1957,7 +1965,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } - codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) { @@ -2004,6 +2011,7 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { bool needs_runtime_call = locations->WillCall(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + auto null_checker = GetImplicitNullChecker(instruction); switch (value_type) { case Primitive::kPrimBoolean: @@ -2013,10 +2021,10 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ StoreToOffset(kStoreByte, value, obj, offset); + __ StoreToOffset(kStoreByte, value, obj, offset, null_checker); } else { __ Addu(TMP, obj, index.AsRegister<Register>()); - __ StoreToOffset(kStoreByte, value, TMP, data_offset); + __ StoreToOffset(kStoreByte, value, TMP, data_offset, null_checker); } break; } @@ -2028,11 +2036,11 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ StoreToOffset(kStoreHalfword, value, obj, offset); + __ StoreToOffset(kStoreHalfword, value, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_2); __ Addu(TMP, obj, TMP); - __ StoreToOffset(kStoreHalfword, value, TMP, data_offset); + __ StoreToOffset(kStoreHalfword, value, TMP, data_offset, null_checker); } break; } @@ -2045,14 +2053,13 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ StoreToOffset(kStoreWord, value, obj, offset); + __ StoreToOffset(kStoreWord, value, obj, offset, null_checker); } else { DCHECK(index.IsRegister()) << index; __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); __ Addu(TMP, obj, TMP); - __ StoreToOffset(kStoreWord, value, TMP, data_offset); + __ StoreToOffset(kStoreWord, value, TMP, data_offset, null_checker); } - codegen_->MaybeRecordImplicitNullCheck(instruction); if (needs_write_barrier) { DCHECK_EQ(value_type, Primitive::kPrimNot); codegen_->MarkGCCard(obj, value); @@ -2075,11 +2082,11 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ StoreToOffset(kStoreDoubleword, value, obj, offset); + __ StoreToOffset(kStoreDoubleword, value, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_8); __ Addu(TMP, obj, TMP); - __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset); + __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset, null_checker); } break; } @@ -2091,11 +2098,11 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ StoreSToOffset(value, obj, offset); + __ StoreSToOffset(value, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); __ Addu(TMP, obj, TMP); - __ StoreSToOffset(value, TMP, data_offset); + __ StoreSToOffset(value, TMP, data_offset, null_checker); } break; } @@ -2107,11 +2114,11 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ StoreDToOffset(value, obj, offset); + __ StoreDToOffset(value, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_8); __ Addu(TMP, obj, TMP); - __ StoreDToOffset(value, TMP, data_offset); + __ StoreDToOffset(value, TMP, data_offset, null_checker); } break; } @@ -2120,11 +2127,6 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } - - // Ints and objects are handled in the switch. - if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) { - codegen_->MaybeRecordImplicitNullCheck(instruction); - } } void LocationsBuilderMIPS::VisitBoundsCheck(HBoundsCheck* instruction) { @@ -3589,6 +3591,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, LoadOperandType load_type = kLoadUnsignedByte; bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + auto null_checker = GetImplicitNullChecker(instruction); switch (type) { case Primitive::kPrimBoolean: @@ -3654,34 +3657,20 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->Out().IsRegisterPair()); dst = locations->Out().AsRegisterPairLow<Register>(); - Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); - if (obj == dst) { - __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ LoadFromOffset(kLoadWord, dst, obj, offset); - } else { - __ LoadFromOffset(kLoadWord, dst, obj, offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); - } } else { DCHECK(locations->Out().IsRegister()); dst = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(load_type, dst, obj, offset); } + __ LoadFromOffset(load_type, dst, obj, offset, null_checker); } else { DCHECK(locations->Out().IsFpuRegister()); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ LoadSFromOffset(dst, obj, offset); + __ LoadSFromOffset(dst, obj, offset, null_checker); } else { - __ LoadDFromOffset(dst, obj, offset); + __ LoadDFromOffset(dst, obj, offset, null_checker); } } - // Longs are handled earlier. - if (type != Primitive::kPrimLong) { - codegen_->MaybeRecordImplicitNullCheck(instruction); - } } if (is_volatile) { @@ -3729,6 +3718,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, StoreOperandType store_type = kStoreByte; bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + auto null_checker = GetImplicitNullChecker(instruction); switch (type) { case Primitive::kPrimBoolean: @@ -3800,28 +3790,20 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->InAt(1).IsRegisterPair()); src = locations->InAt(1).AsRegisterPairLow<Register>(); - Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>(); - __ StoreToOffset(kStoreWord, src, obj, offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize); } else { DCHECK(locations->InAt(1).IsRegister()); src = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(store_type, src, obj, offset); } + __ StoreToOffset(store_type, src, obj, offset, null_checker); } else { DCHECK(locations->InAt(1).IsFpuRegister()); FRegister src = locations->InAt(1).AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ StoreSToOffset(src, obj, offset); + __ StoreSToOffset(src, obj, offset, null_checker); } else { - __ StoreDToOffset(src, obj, offset); + __ StoreDToOffset(src, obj, offset, null_checker); } } - // Longs are handled earlier. - if (type != Primitive::kPrimLong) { - codegen_->MaybeRecordImplicitNullCheck(instruction); - } } // TODO: memory barriers? @@ -4580,11 +4562,6 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); break; - // We need an extra register for PC-relative dex cache accesses. - case HLoadString::LoadKind::kDexCachePcRelative: - case HLoadString::LoadKind::kDexCacheViaMethod: - base_or_current_method_reg = locations->InAt(0).AsRegister<Register>(); - break; default: base_or_current_method_reg = ZERO; break; @@ -4628,52 +4605,15 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { codegen_->DeduplicateBootImageAddressLiteral(address)); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); - static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes."); - DCHECK_ALIGNED(load->GetAddress(), 4u); - int16_t offset = Low16Bits(address); - uint32_t base_address = address - offset; // This accounts for offset sign extension. - __ Lui(out, High16Bits(base_address)); - // /* GcRoot<mirror::String> */ out = *(base_address + offset) - GenerateGcRootFieldLoad(load, out_loc, out, offset); - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - HMipsDexCacheArraysBase* base = load->InputAt(0)->AsMipsDexCacheArraysBase(); - int32_t offset = - load->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; - // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset) - GenerateGcRootFieldLoad(load, out_loc, base_or_current_method_reg, offset); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad(load, - out_loc, - base_or_current_method_reg, - ArtMethod::DeclaringClassOffset().Int32Value()); - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad(load, - out_loc, - out, - CodeGenerator::GetCacheOffset(load->GetStringIndex())); - break; - } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); - codegen_->AddSlowPath(slow_path); - __ Beqz(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); + codegen_->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 63a0345c1c..46810d658f 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -257,6 +257,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); + auto GetImplicitNullChecker(HInstruction* instruction); MipsAssembler* const assembler_; CodeGeneratorMIPS* const codegen_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 4e7a2728b1..4a5755c925 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -3261,22 +3261,11 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { } void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) { - LocationSummary* locations = load->GetLocations(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>(); - __ LoadFromOffset(kLoadUnsignedWord, out, current_method, - ArtMethod::DeclaringClassOffset().Int32Value()); - __ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - __ LoadFromOffset( - kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - // TODO: We will need a read barrier here. - - if (!load->IsInDexCache()) { - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); - codegen_->AddSlowPath(slow_path); - __ Beqzc(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); + codegen_->AddSlowPath(slow_path); + __ Bc(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 7a561bb4ad..f50eb5cb7e 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -445,8 +445,8 @@ class ArraySetSlowPathX86 : public SlowPathCode { // Slow path marking an object during a read barrier. class ReadBarrierMarkSlowPathX86 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj) - : SlowPathCode(instruction), obj_(obj) { + ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj, bool unpoison) + : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { DCHECK(kEmitCompilerReadBarrier); } @@ -464,11 +464,16 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); __ Bind(GetEntryLabel()); + if (unpoison_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(reg); + } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. @@ -498,6 +503,7 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { private: const Location obj_; + const bool unpoison_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86); }; @@ -1578,15 +1584,15 @@ void LocationsBuilderX86::VisitSelect(HSelect* select) { locations->SetOut(Location::SameAsFirstInput()); } -void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) { +void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) { Register lhs_reg = lhs.AsRegister<Register>(); if (rhs.IsConstant()) { int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - codegen_->Compare32BitValue(lhs_reg, value); + Compare32BitValue(lhs_reg, value); } else if (rhs.IsStackSlot()) { - __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex())); + assembler_.cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex())); } else { - __ cmpl(lhs_reg, rhs.AsRegister<Register>()); + assembler_.cmpl(lhs_reg, rhs.AsRegister<Register>()); } } @@ -1619,7 +1625,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong); DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())); LocationSummary* cond_locations = condition->GetLocations(); - GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1)); + codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1)); cond = X86Condition(condition->GetCondition()); } } else { @@ -1728,7 +1734,7 @@ void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) { // Clear output register: setb only sets the low byte. __ xorl(reg, reg); - GenerateIntCompare(lhs, rhs); + codegen_->GenerateIntCompare(lhs, rhs); __ setb(X86Condition(cond->GetCondition()), reg); return; } @@ -4210,7 +4216,7 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { case Primitive::kPrimShort: case Primitive::kPrimChar: case Primitive::kPrimInt: { - GenerateIntCompare(left, right); + codegen_->GenerateIntCompare(left, right); break; } case Primitive::kPrimLong: { @@ -4630,10 +4636,6 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI // load the temp into the XMM and then copy the XMM into the // output, 32 bits at a time). locations->AddTemp(Location::RequiresFpuRegister()); - } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); } } @@ -4677,11 +4679,10 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimNot: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + instruction, out, base, offset, /* needs_null_check */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -5092,11 +5093,6 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier. - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); - } } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { @@ -5171,11 +5167,10 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); } else { Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { @@ -6230,48 +6225,15 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { codegen_->RecordSimplePatch(); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); - // /* GcRoot<mirror::String> */ out = *address - GenerateGcRootFieldLoad(load, out_loc, Address::Absolute(address)); - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - Register base_reg = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = load->GetDexCacheElementOffset(); - Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset); - // /* GcRoot<mirror::String> */ out = *(base + offset) /* PC-relative */ - GenerateGcRootFieldLoad( - load, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - Register current_method = locations->InAt(0).AsRegister<Register>(); - - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad( - load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - break; - } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } static Address GetExceptionTlsAddress() { @@ -6313,8 +6275,8 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { return kEmitCompilerReadBarrier && - (kUseBakerReadBarrier || - type_check_kind == TypeCheckKind::kAbstractClassCheck || + !kUseBakerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || type_check_kind == TypeCheckKind::kArrayObjectCheck); } @@ -6375,7 +6337,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset); switch (type_check_kind) { case TypeCheckKind::kExactCheck: { @@ -6597,7 +6559,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -6633,8 +6595,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -6673,8 +6634,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6706,8 +6666,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -6715,8 +6674,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6907,17 +6865,17 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* Location maybe_temp) { Register out_reg = out.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, out_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it // in the following move operation, as we will need it for the // read barrier below. + DCHECK(maybe_temp.IsRegister()) << maybe_temp; __ movl(maybe_temp.AsRegister<Register>(), out_reg); // /* HeapReference<Object> */ out = *(out + offset) __ movl(out_reg, Address(out_reg, offset)); @@ -6934,17 +6892,15 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp) { + uint32_t offset) { Register out_reg = out.AsRegister<Register>(); Register obj_reg = obj.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, obj_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6987,9 +6943,9 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root); + // Slow path marking the GC root `root`. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( + instruction, root, /* unpoison */ false); codegen_->AddSlowPath(slow_path); __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()), @@ -7023,14 +6979,13 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr Location ref, Register obj, uint32_t offset, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference<Object> */ ref = *(obj + offset) Address src(obj, offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -7038,7 +6993,6 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr Register obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -7051,14 +7005,13 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr Address src = index.IsConstant() ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, const Address& src, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -7088,17 +7041,23 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // performance reasons. Register ref_reg = ref.AsRegister<Register>(); - Register temp_reg = temp.AsRegister<Register>(); uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - // /* int32_t */ monitor = obj->monitor_ - __ movl(temp_reg, Address(obj, monitor_offset)); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86 memory model. @@ -7106,25 +7065,20 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The actual reference load. // /* HeapReference<Object> */ ref = *src - __ movl(ref_reg, src); + __ movl(ref_reg, src); // Flags are unaffected. + + // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. + // Slow path marking the object `ref` when it is gray. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( + instruction, ref, /* unpoison */ true); + AddSlowPath(slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, slow_path->GetEntryLabel()); // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref); - AddSlowPath(slow_path); - - // if (rb_state == ReadBarrier::gray_ptr_) - // ref = ReadBarrier::Mark(ref); - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with SHR. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); - __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f306b33247..c644e401ff 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -254,8 +254,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp); + uint32_t offset); // Generate a GC root reference load: // // root <- *address @@ -295,7 +294,6 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { HBasicBlock* default_block); void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double); - void GenerateIntCompare(Location lhs, Location rhs); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -431,6 +429,8 @@ class CodeGeneratorX86 : public CodeGenerator { Register value, bool value_can_be_null); + void GenerateIntCompare(Location lhs, Location rhs); + void GenerateMemoryBarrier(MemBarrierKind kind); Label* GetLabelOf(HBasicBlock* block) const { @@ -486,7 +486,6 @@ class CodeGeneratorX86 : public CodeGenerator { Location ref, Register obj, uint32_t offset, - Location temp, bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. @@ -495,7 +494,6 @@ class CodeGeneratorX86 : public CodeGenerator { Register obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. @@ -503,7 +501,6 @@ class CodeGeneratorX86 : public CodeGenerator { Location ref, Register obj, const Address& src, - Location temp, bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index cf01a791ee..ec37e5db22 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -466,8 +466,8 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { // Slow path marking an object during a read barrier. class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj) - : SlowPathCode(instruction), obj_(obj) { + ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj, bool unpoison) + : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { DCHECK(kEmitCompilerReadBarrier); } @@ -485,11 +485,16 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); __ Bind(GetEntryLabel()); + if (unpoison_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(obj_.AsRegister<CpuRegister>()); + } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. @@ -519,6 +524,7 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { private: const Location obj_; + const bool unpoison_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); }; @@ -4151,11 +4157,6 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { Location::RequiresRegister(), object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); - } } void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, @@ -4199,11 +4200,10 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimNot: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + instruction, out, base, offset, /* needs_null_check */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -4587,11 +4587,6 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier. - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); - } } void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { @@ -4666,11 +4661,10 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); } else { CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { @@ -5635,53 +5629,15 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { codegen_->RecordSimplePatch(); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - // /* GcRoot<mirror::String> */ out = *address - if (IsUint<32>(load->GetAddress())) { - Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true); - GenerateGcRootFieldLoad(load, out_loc, address); - } else { - // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address). - __ movq(out, Immediate(load->GetAddress())); - GenerateGcRootFieldLoad(load, out_loc, Address(out, 0)); - } - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - uint32_t offset = load->GetDexCacheElementOffset(); - Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset); - Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, - /* no_rip */ false); - // /* GcRoot<mirror::String> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); - - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad( - load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - break; - } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } static Address GetExceptionTlsAddress() { @@ -5724,8 +5680,8 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { return kEmitCompilerReadBarrier && - (kUseBakerReadBarrier || - type_check_kind == TypeCheckKind::kAbstractClassCheck || + !kUseBakerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || type_check_kind == TypeCheckKind::kArrayObjectCheck); } @@ -5786,7 +5742,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset); switch (type_check_kind) { case TypeCheckKind::kExactCheck: { @@ -6016,8 +5972,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); @@ -6041,8 +5996,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. @@ -6062,8 +6016,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -6087,8 +6040,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // Walk over the class hierarchy to find a match. NearLabel loop; @@ -6114,8 +6066,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6134,8 +6085,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // Do an exact check. NearLabel check_non_primitive_component_type; @@ -6163,8 +6113,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -6172,8 +6121,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6189,8 +6137,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // We always go into the type check slow path for the unresolved // and interface check cases. @@ -6358,17 +6305,17 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi Location maybe_temp) { CpuRegister out_reg = out.AsRegister<CpuRegister>(); if (kEmitCompilerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, out_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it // in the following move operation, as we will need it for the // read barrier below. + DCHECK(maybe_temp.IsRegister()) << maybe_temp; __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg); // /* HeapReference<Object> */ out = *(out + offset) __ movl(out_reg, Address(out_reg, offset)); @@ -6385,17 +6332,15 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp) { + uint32_t offset) { CpuRegister out_reg = out.AsRegister<CpuRegister>(); CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, obj_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6438,9 +6383,9 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root); + // Slow path marking the GC root `root`. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( + instruction, root, /* unpoison */ false); codegen_->AddSlowPath(slow_path); __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(), @@ -6475,14 +6420,13 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in Location ref, CpuRegister obj, uint32_t offset, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference<Object> */ ref = *(obj + offset) Address src(obj, offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -6490,7 +6434,6 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in CpuRegister obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6503,14 +6446,13 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in Address src = index.IsConstant() ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, CpuRegister obj, const Address& src, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6540,17 +6482,23 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // performance reasons. CpuRegister ref_reg = ref.AsRegister<CpuRegister>(); - CpuRegister temp_reg = temp.AsRegister<CpuRegister>(); uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - // /* int32_t */ monitor = obj->monitor_ - __ movl(temp_reg, Address(obj, monitor_offset)); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86-64 memory model. @@ -6558,25 +6506,20 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // The actual reference load. // /* HeapReference<Object> */ ref = *src - __ movl(ref_reg, src); + __ movl(ref_reg, src); // Flags are unaffected. + + // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. + // Slow path marking the object `ref` when it is gray. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( + instruction, ref, /* unpoison */ true); + AddSlowPath(slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, slow_path->GetEntryLabel()); // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref); - AddSlowPath(slow_path); - - // if (rb_state == ReadBarrier::gray_ptr_) - // ref = ReadBarrier::Mark(ref); - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with SHR. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); - __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 4e0e34ce38..44844ac67a 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -248,8 +248,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp); + uint32_t offset); // Generate a GC root reference load: // // root <- *address @@ -427,7 +426,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location ref, CpuRegister obj, uint32_t offset, - Location temp, bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. @@ -436,7 +434,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { CpuRegister obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. @@ -444,7 +441,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location ref, CpuRegister obj, const Address& src, - Location temp, bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 18db507c48..fe6c0a305e 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -29,12 +29,6 @@ #include "arch/x86_64/instruction_set_features_x86_64.h" #include "base/macros.h" #include "builder.h" -#include "code_generator_arm.h" -#include "code_generator_arm64.h" -#include "code_generator_mips.h" -#include "code_generator_mips64.h" -#include "code_generator_x86.h" -#include "code_generator_x86_64.h" #include "code_simulator_container.h" #include "common_compiler_test.h" #include "dex_file.h" @@ -52,10 +46,35 @@ #include "utils/mips64/managed_register_mips64.h" #include "utils/x86/managed_register_x86.h" +#ifdef ART_ENABLE_CODEGEN_arm +#include "code_generator_arm.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_arm64 +#include "code_generator_arm64.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_x86 +#include "code_generator_x86.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_x86_64 +#include "code_generator_x86_64.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_mips +#include "code_generator_mips.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_mips64 +#include "code_generator_mips64.h" +#endif + #include "gtest/gtest.h" namespace art { +#ifdef ART_ENABLE_CODEGEN_arm // Provide our own codegen, that ensures the C calling conventions // are preserved. Currently, ART and C do not match as R4 is caller-save // in ART, and callee-save in C. Alternatively, we could use or write @@ -80,7 +99,9 @@ class TestCodeGeneratorARM : public arm::CodeGeneratorARM { blocked_register_pairs_[arm::R6_R7] = false; } }; +#endif +#ifdef ART_ENABLE_CODEGEN_x86 class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 { public: TestCodeGeneratorX86(HGraph* graph, @@ -105,6 +126,7 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 { blocked_register_pairs_[x86::ECX_EDI] = false; } }; +#endif class InternalCodeAllocator : public CodeAllocator { public: @@ -234,37 +256,54 @@ static void RunCode(InstructionSet target_isa, bool has_result, Expected expected) { CompilerOptions compiler_options; +#ifdef ART_ENABLE_CODEGEN_arm if (target_isa == kArm || target_isa == kThumb2) { std::unique_ptr<const ArmInstructionSetFeatures> features_arm( ArmInstructionSetFeatures::FromCppDefines()); TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options); RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kArm64) { + } +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 + if (target_isa == kArm64) { std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( Arm64InstructionSetFeatures::FromCppDefines()); arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options); RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kX86) { + } +#endif +#ifdef ART_ENABLE_CODEGEN_x86 + if (target_isa == kX86) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kX86_64) { + } +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 + if (target_isa == kX86_64) { std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( X86_64InstructionSetFeatures::FromCppDefines()); x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kMips) { + } +#endif +#ifdef ART_ENABLE_CODEGEN_mips + if (target_isa == kMips) { std::unique_ptr<const MipsInstructionSetFeatures> features_mips( MipsInstructionSetFeatures::FromCppDefines()); mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options); RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kMips64) { + } +#endif +#ifdef ART_ENABLE_CODEGEN_mips64 + if (target_isa == kMips64) { std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64( Mips64InstructionSetFeatures::FromCppDefines()); mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options); RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected); } +#endif } static ::std::vector<InstructionSet> GetTargetISAs() { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 89d80cc281..b3d5341de0 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -122,7 +122,10 @@ class HGraphVisualizerDisassembler { new DisassemblerOptions(/* absolute_addresses */ false, base_address, end_address, - /* can_read_literals */ true))); + /* can_read_literals */ true, + Is64BitInstructionSet(instruction_set) + ? &Thread::DumpThreadOffset<PointerSize::k64> + : &Thread::DumpThreadOffset<PointerSize::k32>))); } ~HGraphVisualizerDisassembler() { diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 27d9d48560..0bbc0e54bc 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -41,6 +41,92 @@ ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() { using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>; +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT + +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode { + public: + explicit ReadBarrierSystemArrayCopySlowPathARM(HInstruction* instruction) + : SlowPathCode(instruction) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); + uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + Register dest = locations->InAt(2).AsRegister<Register>(); + Location dest_pos = locations->InAt(3); + Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>(); + Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>(); + Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>(); + Register tmp = locations->GetTemp(3).AsRegister<Register>(); + + __ Bind(GetEntryLabel()); + // Compute the base destination address in `dst_curr_addr`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(dst_curr_addr, dest, element_size * constant + offset); + } else { + __ add(dst_curr_addr, + dest, + ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift)); + __ AddConstant(dst_curr_addr, offset); + } + + Label loop; + __ Bind(&loop); + __ ldr(tmp, Address(src_curr_addr, element_size, Address::PostIndex)); + __ MaybeUnpoisonHeapReference(tmp); + // TODO: Inline the mark bit check before calling the runtime? + // tmp = ReadBarrier::Mark(tmp); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more + // explanations.) + DCHECK_NE(tmp, SP); + DCHECK_NE(tmp, LR); + DCHECK_NE(tmp, PC); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary (and not preserved). It thus cannot be used by + // any live register in this slow path. + DCHECK_NE(src_curr_addr, IP); + DCHECK_NE(dst_curr_addr, IP); + DCHECK_NE(src_stop_addr, IP); + DCHECK_NE(tmp, IP); + DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp; + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + __ MaybePoisonHeapReference(tmp); + __ str(tmp, Address(dst_curr_addr, element_size, Address::PostIndex)); + __ cmp(src_curr_addr, ShifterOperand(src_stop_addr)); + __ b(&loop, NE); + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM); +}; + +#undef __ + bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); @@ -1337,9 +1423,9 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke) } void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -1362,6 +1448,13 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) { if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) { locations->SetInAt(4, Location::RequiresRegister()); } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register IP cannot be used in + // ReadBarrierSystemArrayCopySlowPathARM64 (because that register + // is clobbered by ReadBarrierMarkRegX entry points). Get an extra + // temporary register from the register allocator. + locations->AddTemp(Location::RequiresRegister()); + } } static void CheckPosition(ArmAssembler* assembler, @@ -1427,9 +1520,9 @@ static void CheckPosition(ArmAssembler* assembler, } void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); ArmAssembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1438,18 +1531,22 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); Register src = locations->InAt(0).AsRegister<Register>(); Location src_pos = locations->InAt(1); Register dest = locations->InAt(2).AsRegister<Register>(); Location dest_pos = locations->InAt(3); Location length = locations->InAt(4); - Register temp1 = locations->GetTemp(0).AsRegister<Register>(); - Register temp2 = locations->GetTemp(1).AsRegister<Register>(); - Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Location temp2_loc = locations->GetTemp(1); + Register temp2 = temp2_loc.AsRegister<Register>(); + Location temp3_loc = locations->GetTemp(2); + Register temp3 = temp3_loc.AsRegister<Register>(); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); - codegen_->AddSlowPath(slow_path); + SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); Label conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); @@ -1465,7 +1562,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { DCHECK_GE(src_pos_constant, dest_pos_constant); } else if (src_pos_constant < dest_pos_constant) { __ cmp(src, ShifterOperand(dest)); - __ b(slow_path->GetEntryLabel(), EQ); + __ b(intrinsic_slow_path->GetEntryLabel(), EQ); } // Checked when building locations. @@ -1477,7 +1574,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { __ b(&conditions_on_positions_validated, NE); } __ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant)); - __ b(slow_path->GetEntryLabel(), GT); + __ b(intrinsic_slow_path->GetEntryLabel(), GT); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -1490,19 +1587,19 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { } else { __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>())); } - __ b(slow_path->GetEntryLabel(), LT); + __ b(intrinsic_slow_path->GetEntryLabel(), LT); } __ Bind(&conditions_on_positions_validated); if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. - __ CompareAndBranchIfZero(src, slow_path->GetEntryLabel()); + __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { // Bail out if the destination is null. - __ CompareAndBranchIfZero(dest, slow_path->GetEntryLabel()); + __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel()); } // If the length is negative, bail out. @@ -1511,7 +1608,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { !optimizations.GetCountIsSourceLength() && !optimizations.GetCountIsDestinationLength()) { __ cmp(length.AsRegister<Register>(), ShifterOperand(0)); - __ b(slow_path->GetEntryLabel(), LT); + __ b(intrinsic_slow_path->GetEntryLabel(), LT); } // Validity checks: source. @@ -1519,7 +1616,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { src_pos, src, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsSourceLength()); @@ -1528,7 +1625,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { dest_pos, dest, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsDestinationLength()); @@ -1537,112 +1634,287 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { // type of the destination array. We do two checks: the classes are the same, // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - __ LoadFromOffset(kLoadWord, temp1, dest, class_offset); - __ LoadFromOffset(kLoadWord, temp2, src, class_offset); - bool did_unpoison = false; - if (!optimizations.GetDestinationIsNonPrimitiveArray() || - !optimizations.GetSourceIsNonPrimitiveArray()) { - // One or two of the references need to be unpoisoned. Unpoison them - // both to make the identity check valid. - __ MaybeUnpoisonHeapReference(temp1); - __ MaybeUnpoisonHeapReference(temp2); - did_unpoison = true; - } - if (!optimizations.GetDestinationIsNonPrimitiveArray()) { - // Bail out if the destination is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp1->component_type_ - __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); - __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp3); - __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel()); - } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); + __ LoadFromOffset(kLoadUnsignedHalfword, temp1, temp1, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); + } - if (!optimizations.GetSourceIsNonPrimitiveArray()) { - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp2->component_type_ - __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset); - __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp3); - __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel()); - } + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false); + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); + __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); + __ LoadFromOffset(kLoadUnsignedHalfword, temp2, temp2, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel()); + } + + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); + // Note: if heap poisoning is on, we are comparing two unpoisoned references here. + __ cmp(temp1, ShifterOperand(temp2)); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + Label do_copy; + __ b(&do_copy, EQ); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ b(intrinsic_slow_path->GetEntryLabel(), NE); + } + } else { + // Non read barrier code. + + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ LoadFromOffset(kLoadWord, temp1, dest, class_offset); + // /* HeapReference<Class> */ temp2 = src->klass_ + __ LoadFromOffset(kLoadWord, temp2, src, class_offset); + bool did_unpoison = false; + if (!optimizations.GetDestinationIsNonPrimitiveArray() || + !optimizations.GetSourceIsNonPrimitiveArray()) { + // One or two of the references need to be unpoisoned. Unpoison them + // both to make the identity check valid. + __ MaybeUnpoisonHeapReference(temp1); + __ MaybeUnpoisonHeapReference(temp2); + did_unpoison = true; + } - __ cmp(temp1, ShifterOperand(temp2)); + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); + __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); + } + + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp2->component_type_ + __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset); + __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); + } - if (optimizations.GetDestinationIsTypedObjectArray()) { - Label do_copy; - __ b(&do_copy, EQ); - if (!did_unpoison) { + __ cmp(temp1, ShifterOperand(temp2)); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + Label do_copy; + __ b(&do_copy, EQ); + if (!did_unpoison) { + __ MaybeUnpoisonHeapReference(temp1); + } + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // No need to unpoison the result, we're comparing against null. + __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ b(intrinsic_slow_path->GetEntryLabel(), NE); } - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); - // No need to unpoison the result, we're comparing against null. - __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); - __ Bind(&do_copy); - } else { - __ b(slow_path->GetEntryLabel(), NE); } } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = src->klass_ - __ LoadFromOffset(kLoadWord, temp1, src, class_offset); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp3 = temp1->component_type_ - __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); - __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp3); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp3` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ LoadFromOffset(kLoadWord, temp1, src, class_offset); + __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); + __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp3); + } + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel()); + __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); } - // Compute base source address, base destination address, and end source address. - int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + // Compute the base source address in `temp1`. if (src_pos.IsConstant()) { int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); __ AddConstant(temp1, src, element_size * constant + offset); } else { - __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, 2)); + __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, element_size_shift)); __ AddConstant(temp1, offset); } - if (dest_pos.IsConstant()) { - int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - __ AddConstant(temp2, dest, element_size * constant + offset); - } else { - __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, 2)); - __ AddConstant(temp2, offset); - } - + // Compute the end source address in `temp3`. if (length.IsConstant()) { int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); __ AddConstant(temp3, temp1, element_size * constant); } else { - __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, 2)); + __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, element_size_shift)); } - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - Label loop, done; - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&done, EQ); - __ Bind(&loop); - __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); - __ str(IP, Address(temp2, element_size, Address::PostIndex)); - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&loop, NE); - __ Bind(&done); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // The base destination address is computed later, as `temp2` is + // used for intermediate computations. + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + Label loop, done; + + // Don't enter copy loop if `length == 0`. + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&done, EQ); + + // /* int32_t */ monitor = src->monitor_ + __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `temp2`. + __ add(src, src, ShifterOperand(temp2, LSR, 32)); + + // Slow path used to copy array when `src` is gray. + SlowPathCode* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); + // Carry flag is the last bit shifted out by LSRS. + __ b(read_barrier_slow_path->GetEntryLabel(), CS); + + // Fast-path copy. + + // Compute the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(temp2, dest, element_size * constant + offset); + } else { + __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift)); + __ AddConstant(temp2, offset); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); + __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); + __ str(IP, Address(temp2, element_size, Address::PostIndex)); + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&loop, NE); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); + } else { + // Non read barrier code. + + // Compute the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(temp2, dest, element_size * constant + offset); + } else { + __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift)); + __ AddConstant(temp2, offset); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + Label loop, done; + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&done, EQ); + __ Bind(&loop); + __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); + __ str(IP, Address(temp2, element_size, Address::PostIndex)); + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&loop, NE); + __ Bind(&done); + } // We only need one card marking on the destination array. codegen_->MarkGCCard(temp1, @@ -1651,7 +1923,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { Register(kNoRegister), /* value_can_be_null */ false); - __ Bind(slow_path->GetExitLabel()); + __ Bind(intrinsic_slow_path->GetExitLabel()); } static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 9cfe3ce569..91374b3108 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -144,6 +144,73 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64); }; +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp) + : SlowPathCodeARM64(instruction), tmp_(tmp) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + + Register src_curr_addr = XRegisterFrom(locations->GetTemp(0)); + Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1)); + Register src_stop_addr = XRegisterFrom(locations->GetTemp(2)); + Register tmp_reg = WRegisterFrom(tmp_); + + __ Bind(GetEntryLabel()); + vixl::aarch64::Label slow_copy_loop; + __ Bind(&slow_copy_loop); + __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex)); + codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg); + // TODO: Inline the mark bit check before calling the runtime? + // tmp_reg = ReadBarrier::Mark(tmp_reg); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more + // explanations.) + DCHECK_NE(tmp_.reg(), LR); + DCHECK_NE(tmp_.reg(), WSP); + DCHECK_NE(tmp_.reg(), WZR); + // IP0 is used internally by the ReadBarrierMarkRegX entry point + // as a temporary (and not preserved). It thus cannot be used by + // any live register in this slow path. + DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0); + DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0); + DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0); + DCHECK_NE(tmp_.reg(), IP0); + DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg(); + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg()); + // This runtime call does not require a stack map. + codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg); + __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex)); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&slow_copy_loop, ne); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; } + + private: + Location tmp_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64); +}; #undef __ bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { @@ -2035,9 +2102,9 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128; // We want to use two temporary registers in order to reduce the register pressure in arm64. // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary. void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2090,12 +2157,20 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register IP0, obtained from the VIXL scratch register + // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64 + // (because that register is clobbered by ReadBarrierMarkRegX + // entry points). Get an extra temporary register from the + // register allocator. + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -2104,6 +2179,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); Register src = XRegisterFrom(locations->InAt(0)); Location src_pos = locations->InAt(1); @@ -2111,10 +2187,12 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { Location dest_pos = locations->InAt(3); Location length = locations->InAt(4); Register temp1 = WRegisterFrom(locations->GetTemp(0)); + Location temp1_loc = LocationFrom(temp1); Register temp2 = WRegisterFrom(locations->GetTemp(1)); + Location temp2_loc = LocationFrom(temp2); - SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); - codegen_->AddSlowPath(slow_path); + SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); vixl::aarch64::Label conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); @@ -2130,7 +2208,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { DCHECK_GE(src_pos_constant, dest_pos_constant); } else if (src_pos_constant < dest_pos_constant) { __ Cmp(src, dest); - __ B(slow_path->GetEntryLabel(), eq); + __ B(intrinsic_slow_path->GetEntryLabel(), eq); } // Checked when building locations. DCHECK(!optimizations.GetDestinationIsSource() @@ -2141,7 +2219,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { __ B(&conditions_on_positions_validated, ne); } __ Cmp(WRegisterFrom(dest_pos), src_pos_constant); - __ B(slow_path->GetEntryLabel(), gt); + __ B(intrinsic_slow_path->GetEntryLabel(), gt); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -2150,19 +2228,19 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { } __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()), OperandFrom(dest_pos, invoke->InputAt(3)->GetType())); - __ B(slow_path->GetEntryLabel(), lt); + __ B(intrinsic_slow_path->GetEntryLabel(), lt); } __ Bind(&conditions_on_positions_validated); if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. - __ Cbz(src, slow_path->GetEntryLabel()); + __ Cbz(src, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { // Bail out if the destination is null. - __ Cbz(dest, slow_path->GetEntryLabel()); + __ Cbz(dest, intrinsic_slow_path->GetEntryLabel()); } // We have already checked in the LocationsBuilder for the constant case. @@ -2170,17 +2248,17 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { !optimizations.GetCountIsSourceLength() && !optimizations.GetCountIsDestinationLength()) { // If the length is negative, bail out. - __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel()); + __ Tbnz(WRegisterFrom(length), kWRegSize - 1, intrinsic_slow_path->GetEntryLabel()); // If the length >= 128 then (currently) prefer native implementation. __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold); - __ B(slow_path->GetEntryLabel(), ge); + __ B(intrinsic_slow_path->GetEntryLabel(), ge); } // Validity checks: source. CheckSystemArrayCopyPosition(masm, src_pos, src, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsSourceLength()); @@ -2189,90 +2267,236 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { dest_pos, dest, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsDestinationLength()); { // We use a block to end the scratch scope before the write barrier, thus // freeing the temporary registers so they can be used in `MarkGCCard`. UseScratchRegisterScope temps(masm); + // Note: Because it is acquired from VIXL's scratch register pool, + // `temp3` might be IP0, and thus cannot be used as `ref` argument + // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier + // calls below (see ReadBarrierMarkSlowPathARM64 for more details). Register temp3 = temps.AcquireW(); + if (!optimizations.GetDoesNotNeedTypeCheck()) { // Check whether all elements of the source array are assignable to the component // type of the destination array. We do two checks: the classes are the same, // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - __ Ldr(temp1, MemOperand(dest, class_offset)); - __ Ldr(temp2, MemOperand(src, class_offset)); - bool did_unpoison = false; - if (!optimizations.GetDestinationIsNonPrimitiveArray() || - !optimizations.GetSourceIsNonPrimitiveArray()) { - // One or two of the references need to be unpoisoned. Unpoison them - // both to make the identity check valid. - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); - did_unpoison = true; - } - if (!optimizations.GetDestinationIsNonPrimitiveArray()) { - // Bail out if the destination is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp1->component_type_ - __ Ldr(temp3, HeapOperand(temp1, component_offset)); - __ Cbz(temp3, slow_path->GetEntryLabel()); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); - __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp3, slow_path->GetEntryLabel()); - } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + src.W(), + class_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + temp1, + component_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); + __ Ldrh(temp1, HeapOperand(temp1, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + } - if (!optimizations.GetSourceIsNonPrimitiveArray()) { - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp2->component_type_ - __ Ldr(temp3, HeapOperand(temp2, component_offset)); - __ Cbz(temp3, slow_path->GetEntryLabel()); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); - __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp3, slow_path->GetEntryLabel()); - } + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + dest.W(), + class_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp2_loc, + temp1, + component_offset, + temp3, + /* needs_null_check */ false, + /* use_load_acquire */ false); + __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); + __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); + } - __ Cmp(temp1, temp2); + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp2_loc, + src.W(), + class_offset, + temp3, + /* needs_null_check */ false, + /* use_load_acquire */ false); + // Note: if heap poisoning is on, we are comparing two unpoisoned references here. + __ Cmp(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + vixl::aarch64::Label do_copy; + __ B(&do_copy, eq); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + temp1, + component_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ Ldr(temp1, HeapOperand(temp1, super_offset)); + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ B(intrinsic_slow_path->GetEntryLabel(), ne); + } + } else { + // Non read barrier code. + + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ Ldr(temp1, MemOperand(dest, class_offset)); + // /* HeapReference<Class> */ temp2 = src->klass_ + __ Ldr(temp2, MemOperand(src, class_offset)); + bool did_unpoison = false; + if (!optimizations.GetDestinationIsNonPrimitiveArray() || + !optimizations.GetSourceIsNonPrimitiveArray()) { + // One or two of the references need to be unpoisoned. Unpoison them + // both to make the identity check valid. + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); + did_unpoison = true; + } + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + __ Ldr(temp3, HeapOperand(temp1, component_offset)); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); + } - if (optimizations.GetDestinationIsTypedObjectArray()) { - vixl::aarch64::Label do_copy; - __ B(&do_copy, eq); - if (!did_unpoison) { + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp2->component_type_ + __ Ldr(temp3, HeapOperand(temp2, component_offset)); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); + } + + __ Cmp(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + vixl::aarch64::Label do_copy; + __ B(&do_copy, eq); + if (!did_unpoison) { + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + } + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ Ldr(temp1, HeapOperand(temp1, component_offset)); codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ Ldr(temp1, HeapOperand(temp1, super_offset)); + // No need to unpoison the result, we're comparing against null. + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ B(intrinsic_slow_path->GetEntryLabel(), ne); } - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ Ldr(temp1, HeapOperand(temp1, component_offset)); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - __ Ldr(temp1, HeapOperand(temp1, super_offset)); - // No need to unpoison the result, we're comparing against null. - __ Cbnz(temp1, slow_path->GetEntryLabel()); - __ Bind(&do_copy); - } else { - __ B(slow_path->GetEntryLabel(), ne); } } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = src->klass_ - __ Ldr(temp1, HeapOperand(src.W(), class_offset)); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp3 = temp1->component_type_ - __ Ldr(temp3, HeapOperand(temp1, component_offset)); - __ Cbz(temp3, slow_path->GetEntryLabel()); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); - __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + src.W(), + class_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp2_loc, + temp1, + component_offset, + temp3, + /* needs_null_check */ false, + /* use_load_acquire */ false); + __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ Ldr(temp1, HeapOperand(src.W(), class_offset)); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + __ Ldr(temp2, HeapOperand(temp1, component_offset)); + __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); + } + // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); + __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp3, slow_path->GetEntryLabel()); + __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); } Register src_curr_addr = temp1.X(); Register dst_curr_addr = temp2.X(); - Register src_stop_addr = temp3.X(); + Register src_stop_addr; + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register IP0, obtained from the VIXL scratch + // register pool as `temp3`, cannot be used in + // ReadBarrierSystemArrayCopySlowPathARM64 (because that + // register is clobbered by ReadBarrierMarkRegX entry points). + // So another temporary register allocated by the register + // allocator instead. + DCHECK_EQ(LocationFrom(temp3).reg(), IP0); + src_stop_addr = XRegisterFrom(locations->GetTemp(2)); + } else { + src_stop_addr = temp3.X(); + } GenSystemArrayCopyAddresses(masm, Primitive::kPrimNot, @@ -2285,25 +2509,98 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { dst_curr_addr, src_stop_addr); - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - vixl::aarch64::Label loop, done; const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); - __ Bind(&loop); - __ Cmp(src_curr_addr, src_stop_addr); - __ B(&done, eq); - { + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + vixl::aarch64::Label loop, done; + + // Don't enter copy loop if `length == 0`. + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&done, eq); + Register tmp = temps.AcquireW(); + // Make sure `tmp` is not IP0, as it is clobbered by + // ReadBarrierMarkRegX entry points in + // ReadBarrierSystemArrayCopySlowPathARM64. + DCHECK_NE(LocationFrom(tmp).reg(), IP0); + + // /* int32_t */ monitor = src->monitor_ + __ Ldr(tmp, HeapOperand(src.W(), monitor_offset)); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `tmp`. + __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32)); + + // Slow path used to copy array when `src` is gray. + SlowPathCodeARM64* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp)); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&loop, ne); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); + } else { + // Non read barrier code. + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl::aarch64::Label loop, done; + __ Bind(&loop); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&done, eq); + { + Register tmp = temps.AcquireW(); + __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); + __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); + } + __ B(&loop); + __ Bind(&done); } - __ B(&loop); - __ Bind(&done); } // We only need one card marking on the destination array. codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false); - __ Bind(slow_path->GetExitLabel()); + __ Bind(intrinsic_slow_path->GetExitLabel()); } static void GenIsInfinite(LocationSummary* locations, diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 55e1ab2451..6e5eb6622b 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2456,16 +2456,18 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { __ FloorWS(FTMP, in); __ Mfc1(out, FTMP); - __ LoadConst32(TMP, 1); + if (!IsR6()) { + __ LoadConst32(TMP, -1); + } - // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0; + // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0; __ LoadConst32(AT, std::numeric_limits<int32_t>::max()); __ Bne(AT, out, &finite); __ Mtc1(ZERO, FTMP); if (IsR6()) { __ CmpLtS(FTMP, in, FTMP); - __ Mfc1(AT, FTMP); + __ Mfc1(TMP, FTMP); } else { __ ColtS(in, FTMP); } @@ -2474,28 +2476,26 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { __ Bind(&finite); - // TMP = (0.5f <= (in - out)) ? 1 : 0; + // TMP = (0.5f <= (in - out)) ? -1 : 0; __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float". __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f)); __ SubS(FTMP, in, FTMP); __ Mtc1(AT, half); if (IsR6()) { __ CmpLeS(FTMP, half, FTMP); - __ Mfc1(AT, FTMP); + __ Mfc1(TMP, FTMP); } else { __ ColeS(half, FTMP); } __ Bind(&add); - if (IsR6()) { - __ Selnez(TMP, TMP, AT); - } else { + if (!IsR6()) { __ Movf(TMP, ZERO); } - // Return out += TMP. - __ Addu(out, out, TMP); + // Return out -= TMP. + __ Subu(out, out, TMP); __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 22f4181b92..cf4a040551 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -70,6 +70,105 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>; +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT + +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { + public: + explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction) + : SlowPathCode(instruction) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + Register src = locations->InAt(0).AsRegister<Register>(); + Location src_pos = locations->InAt(1); + Register dest = locations->InAt(2).AsRegister<Register>(); + Location dest_pos = locations->InAt(3); + Location length = locations->InAt(4); + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + + __ Bind(GetEntryLabel()); + // In this code path, registers `temp1`, `temp2`, and `temp3` + // (resp.) are not used for the base source address, the base + // destination address, and the end source address (resp.), as in + // other SystemArrayCopy intrinsic code paths. Instead they are + // (resp.) used for: + // - the loop index (`i`); + // - the source index (`src_index`) and the loaded (source) + // reference (`value`); and + // - the destination index (`dest_index`). + + // i = 0 + __ xorl(temp1, temp1); + NearLabel loop; + __ Bind(&loop); + // value = src_array[i + src_pos] + if (src_pos.IsConstant()) { + int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); + int32_t adjusted_offset = offset + constant * element_size; + __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset)); + } else { + __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0)); + __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset)); + } + __ MaybeUnpoisonHeapReference(temp2); + // TODO: Inline the mark bit check before calling the runtime? + // value = ReadBarrier::Mark(value) + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more + // explanations.) + DCHECK_NE(temp2, ESP); + DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2; + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2); + // This runtime call does not require a stack map. + x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + __ MaybePoisonHeapReference(temp2); + // dest_array[i + dest_pos] = value + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + int32_t adjusted_offset = offset + constant * element_size; + __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2); + } else { + __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0)); + __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2); + } + // ++i + __ addl(temp1, Immediate(1)); + // if (i != length) goto loop + x86_codegen->GenerateIntCompare(temp1_loc, length); + __ j(kNotEqual, &loop); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86); +}; + +#undef __ + #define __ assembler-> static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) { @@ -1835,10 +1934,9 @@ static void GenUnsafeGet(HInvoke* invoke, Register output = output_loc.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, temp, /* needs_null_check */ false); + invoke, output_loc, base, src, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -1901,11 +1999,6 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetOut(Location::RequiresRegister(), can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); - } } void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { @@ -2678,9 +2771,9 @@ static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) } void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2710,9 +2803,9 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); X86Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -2721,17 +2814,21 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); Register src = locations->InAt(0).AsRegister<Register>(); Location src_pos = locations->InAt(1); Register dest = locations->InAt(2).AsRegister<Register>(); Location dest_pos = locations->InAt(3); - Location length = locations->InAt(4); - Register temp1 = locations->GetTemp(0).AsRegister<Register>(); - Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Location length_arg = locations->InAt(4); + Location length = length_arg; + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Location temp2_loc = locations->GetTemp(1); + Register temp2 = temp2_loc.AsRegister<Register>(); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); - codegen_->AddSlowPath(slow_path); + SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); NearLabel conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); @@ -2747,7 +2844,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { DCHECK_GE(src_pos_constant, dest_pos_constant); } else if (src_pos_constant < dest_pos_constant) { __ cmpl(src, dest); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -2755,7 +2852,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotEqual, &conditions_on_positions_validated); } __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant)); - __ j(kGreater, slow_path->GetEntryLabel()); + __ j(kGreater, intrinsic_slow_path->GetEntryLabel()); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -2765,10 +2862,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (dest_pos.IsConstant()) { int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant)); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } else { __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>()); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } } @@ -2777,16 +2874,17 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. __ testl(src, src); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { // Bail out if the destination is null. __ testl(dest, dest); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } - Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + Location temp3_loc = locations->GetTemp(2); + Register temp3 = temp3_loc.AsRegister<Register>(); if (length.IsStackSlot()) { __ movl(temp3, Address(ESP, length.GetStackIndex())); length = Location::RegisterLocation(temp3); @@ -2798,7 +2896,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { !optimizations.GetCountIsSourceLength() && !optimizations.GetCountIsDestinationLength()) { __ testl(length.AsRegister<Register>(), length.AsRegister<Register>()); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } // Validity checks: source. @@ -2806,7 +2904,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { src_pos, src, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsSourceLength()); @@ -2815,7 +2913,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { dest_pos, dest, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsDestinationLength()); @@ -2824,72 +2922,159 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // type of the destination array. We do two checks: the classes are the same, // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. + if (!optimizations.GetSourceIsNonPrimitiveArray()) { - // /* HeapReference<Class> */ temp1 = temp1->klass_ - __ movl(temp1, Address(src, class_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ movl(temp1, Address(temp1, component_offset)); - __ testl(temp1, temp1); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp1); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(temp1, temp1); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ movl(temp1, Address(src, class_offset)); + __ MaybeUnpoisonHeapReference(temp1); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ movl(temp1, Address(temp1, component_offset)); + __ testl(temp1, temp1); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp1); + } __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } - if (!optimizations.GetDestinationIsNonPrimitiveArray()) { - // /* HeapReference<Class> */ temp1 = temp1->klass_ - __ movl(temp1, Address(dest, class_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // Bail out if the destination is not a non primitive array. - // /* HeapReference<Class> */ temp2 = temp1->component_type_ - __ movl(temp2, Address(temp1, component_offset)); - __ testl(temp2, temp2); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp2); - __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - // Re-poison the heap reference to make the compare instruction below - // compare two poisoned references. - __ PoisonHeapReference(temp1); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (length.Equals(Location::RegisterLocation(temp3))) { + // When Baker read barriers are enabled, register `temp3`, + // which in the present case contains the `length` parameter, + // will be overwritten below. Make the `length` location + // reference the original stack location; it will be moved + // back to `temp3` later if necessary. + DCHECK(length_arg.IsStackSlot()); + length = length_arg; + } + + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(temp2, temp2); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + } + + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); + // Note: if heap poisoning is on, we are comparing two unpoisoned references here. + __ cmpl(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + NearLabel do_copy; + __ j(kEqual, &do_copy); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ cmpl(Address(temp1, super_offset), Immediate(0)); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + } } else { - // /* HeapReference<Class> */ temp1 = temp1->klass_ - __ movl(temp1, Address(dest, class_offset)); - } + // Non read barrier code. - // Note: if poisoning is on, we are here comparing two poisoned references. - __ cmpl(temp1, Address(src, class_offset)); + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ movl(temp1, Address(dest, class_offset)); + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + __ MaybeUnpoisonHeapReference(temp1); + // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + __ movl(temp2, Address(temp1, component_offset)); + __ testl(temp2, temp2); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp2); + __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + // Re-poison the heap reference to make the compare instruction below + // compare two poisoned references. + __ PoisonHeapReference(temp1); + } - if (optimizations.GetDestinationIsTypedObjectArray()) { - NearLabel do_copy; - __ j(kEqual, &do_copy); + // Note: if heap poisoning is on, we are comparing two poisoned references here. + __ cmpl(temp1, Address(src, class_offset)); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + NearLabel do_copy; + __ j(kEqual, &do_copy); + __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ movl(temp1, Address(temp1, component_offset)); + __ MaybeUnpoisonHeapReference(temp1); + __ cmpl(Address(temp1, super_offset), Immediate(0)); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + } + } + } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { + DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); + // Bail out if the source is not a non primitive array. + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(temp1, temp1); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ movl(temp1, Address(src, class_offset)); __ MaybeUnpoisonHeapReference(temp1); // /* HeapReference<Class> */ temp1 = temp1->component_type_ __ movl(temp1, Address(temp1, component_offset)); + __ testl(temp1, temp1); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); __ MaybeUnpoisonHeapReference(temp1); - __ cmpl(Address(temp1, super_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ Bind(&do_copy); - } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); } - } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { - DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = src->klass_ - __ movl(temp1, Address(src, class_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ movl(temp1, Address(temp1, component_offset)); - __ testl(temp1, temp1); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp1); __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } - // Compute base source address, base destination address, and end source address. + // Compute the base source address in `temp1`. int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); DCHECK_EQ(element_size, 4); uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); @@ -2900,35 +3085,138 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); } - if (dest_pos.IsConstant()) { - int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp2, Address(dest, element_size * constant + offset)); - } else { - __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); - } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // If it is needed (in the case of the fast-path loop), the base + // destination address is computed later, as `temp2` is used for + // intermediate computations. - if (length.IsConstant()) { - int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp3, Address(temp1, element_size * constant)); + // Compute the end source address in `temp3`. + if (length.IsConstant()) { + int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp3, Address(temp1, element_size * constant)); + } else { + if (length.IsStackSlot()) { + // Location `length` is again pointing at a stack slot, as + // register `temp3` (which was containing the length parameter + // earlier) has been overwritten; restore it now + DCHECK(length.Equals(length_arg)); + __ movl(temp3, Address(ESP, length.GetStackIndex())); + length = Location::RegisterLocation(temp3); + } + __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0)); + } + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // // Slow-path copy. + // for (size_t i = 0; i != length; ++i) { + // dest_array[dest_pos + i] = + // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i]))); + // } + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + NearLabel loop, done; + + // Don't enter copy loop if `length == 0`. + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // goto slow_path; + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); + + // Load fence to prevent load-load reordering. + // Note that this is a no-op, thanks to the x86 memory model. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + + // Slow path used to copy array when `src` is gray. + SlowPathCode* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + + // Set the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp2, Address(dest, element_size * constant + offset)); + } else { + __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); + __ pushl(Address(temp1, 0)); + __ cfi().AdjustCFAOffset(4); + __ popl(Address(temp2, 0)); + __ cfi().AdjustCFAOffset(-4); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); } else { - __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0)); - } - - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - NearLabel loop, done; - __ cmpl(temp1, temp3); - __ j(kEqual, &done); - __ Bind(&loop); - __ pushl(Address(temp1, 0)); - __ cfi().AdjustCFAOffset(4); - __ popl(Address(temp2, 0)); - __ cfi().AdjustCFAOffset(-4); - __ addl(temp1, Immediate(element_size)); - __ addl(temp2, Immediate(element_size)); - __ cmpl(temp1, temp3); - __ j(kNotEqual, &loop); - __ Bind(&done); + // Non read barrier code. + + // Compute the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp2, Address(dest, element_size * constant + offset)); + } else { + __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); + } + + // Compute the end source address in `temp3`. + if (length.IsConstant()) { + int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp3, Address(temp1, element_size * constant)); + } else { + __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0)); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + NearLabel loop, done; + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + __ Bind(&loop); + __ pushl(Address(temp1, 0)); + __ cfi().AdjustCFAOffset(4); + __ popl(Address(temp2, 0)); + __ cfi().AdjustCFAOffset(-4); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + __ Bind(&done); + } // We only need one card marking on the destination array. codegen_->MarkGCCard(temp1, @@ -2937,7 +3225,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { Register(kNoRegister), /* value_can_be_null */ false); - __ Bind(slow_path->GetExitLabel()); + __ Bind(intrinsic_slow_path->GetExitLabel()); } UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index ab8b05c3d4..a4ee546237 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -64,6 +64,65 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>; +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT + +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { + public: + explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction) + : SlowPathCode(instruction) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + + CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>(); + CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>(); + + __ Bind(GetEntryLabel()); + NearLabel loop; + __ Bind(&loop); + __ movl(CpuRegister(TMP), Address(src_curr_addr, 0)); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + // TODO: Inline the mark bit check before calling the runtime? + // TMP = ReadBarrier::Mark(TMP); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP); + // This runtime call does not require a stack map. + x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + __ MaybePoisonHeapReference(CpuRegister(TMP)); + __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP)); + __ addl(src_curr_addr, Immediate(element_size)); + __ addl(dst_curr_addr, Immediate(element_size)); + __ cmpl(src_curr_addr, src_stop_addr); + __ j(kNotEqual, &loop); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64); +}; + +#undef __ + #define __ assembler-> static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -1053,9 +1112,9 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -1063,9 +1122,9 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); X86_64Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1074,18 +1133,23 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>(); Location src_pos = locations->InAt(1); CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>(); Location dest_pos = locations->InAt(3); Location length = locations->InAt(4); - CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); - CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); - CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>(); + Location temp1_loc = locations->GetTemp(0); + CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>(); + Location temp2_loc = locations->GetTemp(1); + CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); + Location temp3_loc = locations->GetTemp(2); + CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>(); + Location TMP_loc = Location::RegisterLocation(TMP); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); - codegen_->AddSlowPath(slow_path); + SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); NearLabel conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); @@ -1101,7 +1165,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { DCHECK_GE(src_pos_constant, dest_pos_constant); } else if (src_pos_constant < dest_pos_constant) { __ cmpl(src, dest); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -1109,7 +1173,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotEqual, &conditions_on_positions_validated); } __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant)); - __ j(kGreater, slow_path->GetEntryLabel()); + __ j(kGreater, intrinsic_slow_path->GetEntryLabel()); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -1119,10 +1183,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (dest_pos.IsConstant()) { int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant)); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } else { __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>()); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } } @@ -1131,13 +1195,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. __ testl(src, src); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { // Bail out if the destination is null. __ testl(dest, dest); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } // If the length is negative, bail out. @@ -1146,7 +1210,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { !optimizations.GetCountIsSourceLength() && !optimizations.GetCountIsDestinationLength()) { __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>()); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } // Validity checks: source. @@ -1154,7 +1218,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { src_pos, src, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsSourceLength()); @@ -1163,7 +1227,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { dest_pos, dest, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsDestinationLength()); @@ -1172,38 +1236,80 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // type of the destination array. We do two checks: the classes are the same, // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - __ movl(temp1, Address(dest, class_offset)); - __ movl(temp2, Address(src, class_offset)); + bool did_unpoison = false; - if (!optimizations.GetDestinationIsNonPrimitiveArray() || - !optimizations.GetSourceIsNonPrimitiveArray()) { - // One or two of the references need to be unpoisoned. Unpoison them - // both to make the identity check valid. - __ MaybeUnpoisonHeapReference(temp1); - __ MaybeUnpoisonHeapReference(temp2); - did_unpoison = true; + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); + // If heap poisoning is enabled, `temp1` and `temp2` have been + // unpoisoned by the the previous calls to + // GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ movl(temp1, Address(dest, class_offset)); + // /* HeapReference<Class> */ temp2 = src->klass_ + __ movl(temp2, Address(src, class_offset)); + if (!optimizations.GetDestinationIsNonPrimitiveArray() || + !optimizations.GetSourceIsNonPrimitiveArray()) { + // One or two of the references need to be unpoisoned. Unpoison them + // both to make the identity check valid. + __ MaybeUnpoisonHeapReference(temp1); + __ MaybeUnpoisonHeapReference(temp2); + did_unpoison = true; + } } if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. - // /* HeapReference<Class> */ TMP = temp1->component_type_ - __ movl(CpuRegister(TMP), Address(temp1, component_offset)); - __ testl(CpuRegister(TMP), CpuRegister(TMP)); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ TMP = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `TMP` has been unpoisoned by + // the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ TMP = temp1->component_type_ + __ movl(CpuRegister(TMP), Address(temp1, component_offset)); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + } __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetSourceIsNonPrimitiveArray()) { // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ TMP = temp2->component_type_ - __ movl(CpuRegister(TMP), Address(temp2, component_offset)); - __ testl(CpuRegister(TMP), CpuRegister(TMP)); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ TMP = temp2->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `TMP` has been unpoisoned by + // the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ TMP = temp2->component_type_ + __ movl(CpuRegister(TMP), Address(temp2, component_offset)); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + } __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } __ cmpl(temp1, temp2); @@ -1211,34 +1317,56 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (optimizations.GetDestinationIsTypedObjectArray()) { NearLabel do_copy; __ j(kEqual, &do_copy); - if (!did_unpoison) { + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ cmpl(Address(temp1, super_offset), Immediate(0)); + } else { + if (!did_unpoison) { + __ MaybeUnpoisonHeapReference(temp1); + } + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ movl(temp1, Address(temp1, component_offset)); __ MaybeUnpoisonHeapReference(temp1); + // No need to unpoison the following heap reference load, as + // we're comparing against null. + __ cmpl(Address(temp1, super_offset), Immediate(0)); } - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ movl(temp1, Address(temp1, component_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - __ movl(temp1, Address(temp1, super_offset)); - // No need to unpoison the result, we're comparing against null. - __ testl(temp1, temp1); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); __ Bind(&do_copy); } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = src->klass_ - __ movl(temp1, Address(src, class_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ TMP = temp1->component_type_ - __ movl(CpuRegister(TMP), Address(temp1, component_offset)); - __ testl(CpuRegister(TMP), CpuRegister(TMP)); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + // /* HeapReference<Class> */ TMP = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ movl(temp1, Address(src, class_offset)); + __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ TMP = temp1->component_type_ + __ movl(CpuRegister(TMP), Address(temp1, component_offset)); + // No need to unpoison `TMP` now, as we're comparing against null. + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + } __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } // Compute base source address, base destination address, and end source address. @@ -1266,19 +1394,88 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0)); } - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - NearLabel loop, done; - __ cmpl(temp1, temp3); - __ j(kEqual, &done); - __ Bind(&loop); - __ movl(CpuRegister(TMP), Address(temp1, 0)); - __ movl(Address(temp2, 0), CpuRegister(TMP)); - __ addl(temp1, Immediate(element_size)); - __ addl(temp2, Immediate(element_size)); - __ cmpl(temp1, temp3); - __ j(kNotEqual, &loop); - __ Bind(&done); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + NearLabel loop, done; + + // Don't enter copy loop if `length == 0`. + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // goto slow_path; + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); + + // Load fence to prevent load-load reordering. + // Note that this is a no-op, thanks to the x86-64 memory model. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + + // Slow path used to copy array when `src` is gray. + SlowPathCode* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); + __ movl(CpuRegister(TMP), Address(temp1, 0)); + __ movl(Address(temp2, 0), CpuRegister(TMP)); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); + } else { + // Non read barrier code. + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + NearLabel loop, done; + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + __ Bind(&loop); + __ movl(CpuRegister(TMP), Address(temp1, 0)); + __ movl(Address(temp2, 0), CpuRegister(TMP)); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + __ Bind(&done); + } // We only need one card marking on the destination array. codegen_->MarkGCCard(temp1, @@ -1287,7 +1484,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { CpuRegister(kNoRegister), /* value_can_be_null */ false); - __ Bind(slow_path->GetExitLabel()); + __ Bind(intrinsic_slow_path->GetExitLabel()); } void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) { @@ -1892,10 +2089,9 @@ static void GenUnsafeGet(HInvoke* invoke, case Primitive::kPrimNot: { if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, temp, /* needs_null_check */ false); + invoke, output_loc, base, src, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -1918,9 +2114,7 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, - HInvoke* invoke, - Primitive::Type type) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); @@ -1934,30 +2128,25 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); - } } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(arena_, invoke); } diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index a6d234d739..8c0231e1aa 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -157,13 +157,26 @@ class OptimizingCFITest : public CFITest { TestImpl(isa, #isa, expected_asm, expected_cfi); \ } +#ifdef ART_ENABLE_CODEGEN_arm TEST_ISA(kThumb2) +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 TEST_ISA(kArm64) +#endif +#ifdef ART_ENABLE_CODEGEN_x86 TEST_ISA(kX86) +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 TEST_ISA(kX86_64) +#endif +#ifdef ART_ENABLE_CODEGEN_mips TEST_ISA(kMips) +#endif +#ifdef ART_ENABLE_CODEGEN_mips64 TEST_ISA(kMips64) +#endif +#ifdef ART_ENABLE_CODEGEN_arm TEST_F(OptimizingCFITest, kThumb2Adjust) { std::vector<uint8_t> expected_asm( expected_asm_kThumb2_adjust, @@ -184,7 +197,9 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) { Finish(); Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi); } +#endif +#ifdef ART_ENABLE_CODEGEN_mips TEST_F(OptimizingCFITest, kMipsAdjust) { // One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. static constexpr size_t kNumNops = 1u + (1u << 15); @@ -212,7 +227,9 @@ TEST_F(OptimizingCFITest, kMipsAdjust) { Finish(); Check(kMips, "kMips_adjust", expected_asm, expected_cfi); } +#endif +#ifdef ART_ENABLE_CODEGEN_mips64 TEST_F(OptimizingCFITest, kMips64Adjust) { // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. static constexpr size_t kNumNops = 1u + (1u << 15); @@ -240,6 +257,7 @@ TEST_F(OptimizingCFITest, kMips64Adjust) { Finish(); Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi); } +#endif #endif // ART_TARGET_ANDROID diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 698b0b6d43..f7c325ed93 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -428,8 +428,14 @@ static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) { || instruction_set == kX86_64; } +// Strip pass name suffix to get optimization name. +static std::string ConvertPassNameToOptimizationName(const std::string& pass_name) { + size_t pos = pass_name.find(kPassNameSeparator); + return pos == std::string::npos ? pass_name : pass_name.substr(0, pos); +} + static HOptimization* BuildOptimization( - const std::string& opt_name, + const std::string& pass_name, ArenaAllocator* arena, HGraph* graph, OptimizingCompilerStats* stats, @@ -439,6 +445,7 @@ static HOptimization* BuildOptimization( StackHandleScopeCollection* handles, SideEffectsAnalysis* most_recent_side_effects, HInductionVarAnalysis* most_recent_induction) { + std::string opt_name = ConvertPassNameToOptimizationName(pass_name); if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) { CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); return new (arena) BoundsCheckElimination(graph, @@ -446,11 +453,11 @@ static HOptimization* BuildOptimization( most_recent_induction); } else if (opt_name == GVNOptimization::kGlobalValueNumberingPassName) { CHECK(most_recent_side_effects != nullptr); - return new (arena) GVNOptimization(graph, *most_recent_side_effects); + return new (arena) GVNOptimization(graph, *most_recent_side_effects, pass_name.c_str()); } else if (opt_name == HConstantFolding::kConstantFoldingPassName) { - return new (arena) HConstantFolding(graph); + return new (arena) HConstantFolding(graph, pass_name.c_str()); } else if (opt_name == HDeadCodeElimination::kDeadCodeEliminationPassName) { - return new (arena) HDeadCodeElimination(graph, stats); + return new (arena) HDeadCodeElimination(graph, stats, pass_name.c_str()); } else if (opt_name == HInliner::kInlinerPassName) { size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_; return new (arena) HInliner(graph, // outer_graph @@ -470,7 +477,7 @@ static HOptimization* BuildOptimization( } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { return new (arena) HInductionVarAnalysis(graph); } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) { - return new (arena) InstructionSimplifier(graph, stats); + return new (arena) InstructionSimplifier(graph, stats, pass_name.c_str()); } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) { return new (arena) IntrinsicsRecognizer(graph, driver, stats); } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) { @@ -522,12 +529,9 @@ static ArenaVector<HOptimization*> BuildOptimizations( SideEffectsAnalysis* most_recent_side_effects = nullptr; HInductionVarAnalysis* most_recent_induction = nullptr; ArenaVector<HOptimization*> ret(arena->Adapter()); - for (std::string pass_name : pass_names) { - size_t pos = pass_name.find(kPassNameSeparator); // Strip suffix to get base pass name. - std::string opt_name = pos == std::string::npos ? pass_name : pass_name.substr(0, pos); - + for (const std::string& pass_name : pass_names) { HOptimization* opt = BuildOptimization( - opt_name, + pass_name, arena, graph, stats, @@ -540,6 +544,7 @@ static ArenaVector<HOptimization*> BuildOptimizations( CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\""; ret.push_back(opt); + std::string opt_name = ConvertPassNameToOptimizationName(pass_name); if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt); } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index b73f73893c..6effc306dc 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -279,8 +279,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { const DexFile& dex_file = load_string->GetDexFile(); uint32_t string_index = load_string->GetStringIndex(); - bool is_in_dex_cache = false; - HLoadString::LoadKind desired_load_kind; + HLoadString::LoadKind desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; uint64_t address = 0u; // String or dex cache element address. { Runtime* runtime = Runtime::Current(); @@ -296,33 +295,14 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { DCHECK(!runtime->UseJitCompilation()); mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache); CHECK(string != nullptr); - if (!compiler_driver_->GetSupportBootImageFixup()) { - // MIPS/MIPS64 or compiler_driver_test. Do not sharpen. - desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; - } else { - DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)); - is_in_dex_cache = true; - desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic() - ? HLoadString::LoadKind::kBootImageLinkTimePcRelative - : HLoadString::LoadKind::kBootImageLinkTimeAddress; - } + // TODO: In follow up CL, add PcRelative and Address back in. } else if (runtime->UseJitCompilation()) { // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); mirror::String* string = dex_cache->GetResolvedString(string_index); - is_in_dex_cache = (string != nullptr); if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { - // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 desired_load_kind = HLoadString::LoadKind::kBootImageAddress; address = reinterpret_cast64<uint64_t>(string); - } else { - // Note: If the string is not in the dex cache, the instruction needs environment - // and will not be inlined across dex files. Within a dex file, the slow-path helper - // loads the correct string and inlined frames are used correctly for OOM stack trace. - // TODO: Write a test for this. Bug: 29416588 - desired_load_kind = HLoadString::LoadKind::kDexCacheAddress; - void* dex_cache_element_address = &dex_cache->GetStrings()[string_index]; - address = reinterpret_cast64<uint64_t>(dex_cache_element_address); } } else { // AOT app compilation. Try to lookup the string without allocating if not found. @@ -332,19 +312,9 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { !codegen_->GetCompilerOptions().GetCompilePic()) { desired_load_kind = HLoadString::LoadKind::kBootImageAddress; address = reinterpret_cast64<uint64_t>(string); - } else { - // Not JIT and either the string is not in boot image or we are compiling in PIC mode. - // Use PC-relative load from the dex cache if the dex file belongs - // to the oat file that we're currently compiling. - desired_load_kind = ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file) - ? HLoadString::LoadKind::kDexCachePcRelative - : HLoadString::LoadKind::kDexCacheViaMethod; } } } - if (is_in_dex_cache) { - load_string->MarkInDexCache(); - } HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind); switch (load_kind) { |