summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator.cc3
-rw-r--r--compiler/optimizing/code_generator_arm.cc65
-rw-r--r--compiler/optimizing/code_generator_arm64.cc74
-rw-r--r--compiler/optimizing/code_generator_mips.cc162
-rw-r--r--compiler/optimizing/code_generator_mips.h1
-rw-r--r--compiler/optimizing/code_generator_mips64.cc21
-rw-r--r--compiler/optimizing/code_generator_x86.cc172
-rw-r--r--compiler/optimizing/code_generator_x86.h9
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc177
-rw-r--r--compiler/optimizing/code_generator_x86_64.h6
-rw-r--r--compiler/optimizing/codegen_test.cc61
-rw-r--r--compiler/optimizing/graph_visualizer.cc5
-rw-r--r--compiler/optimizing/intrinsics_arm.cc466
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc463
-rw-r--r--compiler/optimizing/intrinsics_mips.cc20
-rw-r--r--compiler/optimizing/intrinsics_x86.cc502
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc369
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc18
-rw-r--r--compiler/optimizing/optimizing_compiler.cc25
-rw-r--r--compiler/optimizing/sharpening.cc34
20 files changed, 1737 insertions, 916 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 5152075499..c532e72465 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1228,7 +1228,8 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in
instruction->IsLoadString() ||
instruction->IsInstanceOf() ||
instruction->IsCheckCast() ||
- (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))
+ (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()) ||
+ (instruction->IsInvokeStaticOrDirect() && instruction->GetLocations()->Intrinsified()))
<< "instruction->DebugName()=" << instruction->DebugName()
<< " slow_path->GetDescription()=" << slow_path->GetDescription();
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 4c4128c5f8..6d9c55cd75 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -429,7 +429,8 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode {
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -441,6 +442,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode {
DCHECK_NE(reg, SP);
DCHECK_NE(reg, LR);
DCHECK_NE(reg, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(reg, IP);
DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg;
// "Compact" slow path, saving two moves.
//
@@ -5585,55 +5589,15 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
__ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
- // 16-bit LDR immediate has a 5-bit offset multiplied by the size and that gives
- // a 128B range. To try and reduce the number of literals if we load multiple strings,
- // simply split the dex cache address to a 128B aligned base loaded from a literal
- // and the remaining offset embedded in the load.
- static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
- DCHECK_ALIGNED(load->GetAddress(), 4u);
- constexpr size_t offset_bits = /* encoded bits */ 5 + /* scale */ 2;
- uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits);
- uint32_t offset = address & MaxInt<uint32_t>(offset_bits);
- __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address));
- // /* GcRoot<mirror::String> */ out = *(base_address + offset)
- GenerateGcRootFieldLoad(load, out_loc, out, offset);
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- Register base_reg = locations->InAt(0).AsRegister<Register>();
- HArmDexCacheArraysBase* base = load->InputAt(0)->AsArmDexCacheArraysBase();
- int32_t offset = load->GetDexCacheElementOffset() - base->GetElementOffset();
- // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset)
- GenerateGcRootFieldLoad(load, out_loc, base_reg, offset);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- Register current_method = locations->InAt(0).AsRegister<Register>();
-
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(
- load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(
- load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- break;
- }
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
- codegen_->AddSlowPath(slow_path);
- __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
+ codegen_->AddSlowPath(slow_path);
+ __ b(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
static int32_t GetExceptionTlsOffset() {
@@ -6413,7 +6377,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path used to mark the GC root `root`.
+ // Slow path marking the GC root `root`.
SlowPathCode* slow_path =
new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root);
codegen_->AddSlowPath(slow_path);
@@ -6522,7 +6486,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// Introduce a dependency on the lock_word including the rb_state,
// which shall prevent load-load reordering without using
// a memory barrier (which would be more expensive).
- // obj is unchanged by this operation, but its value now depends on temp_reg.
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp_reg`.
__ add(obj, obj, ShifterOperand(temp_reg, LSR, 32));
// The actual reference load.
@@ -6553,7 +6518,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// Object* ref = ref_addr->AsMirrorPtr()
__ MaybeUnpoisonHeapReference(ref_reg);
- // Slow path used to mark the object `ref` when it is gray.
+ // Slow path marking the object `ref` when it is gray.
SlowPathCode* slow_path =
new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
AddSlowPath(slow_path);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index d95e7df6b4..cc8985d0b0 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -595,7 +595,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -607,7 +608,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK_NE(obj_.reg(), LR);
DCHECK_NE(obj_.reg(), WSP);
DCHECK_NE(obj_.reg(), WZR);
- // WIP0 is used by the slow path as a temp, it can not be the object register.
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
DCHECK_NE(obj_.reg(), IP0);
DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg();
// "Compact" slow path, saving two moves.
@@ -4195,7 +4197,6 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
}
void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
- Location out_loc = load->GetLocations()->Out();
Register out = OutputRegister(load);
switch (load->GetLoadKind()) {
@@ -4231,63 +4232,15 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
__ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(load->GetAddress()));
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- // LDR immediate has a 12-bit offset multiplied by the size and for 32-bit loads
- // that gives a 16KiB range. To try and reduce the number of literals if we load
- // multiple strings, simply split the dex cache address to a 16KiB aligned base
- // loaded from a literal and the remaining offset embedded in the load.
- static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
- DCHECK_ALIGNED(load->GetAddress(), 4u);
- constexpr size_t offset_bits = /* encoded bits */ 12 + /* scale */ 2;
- uint64_t base_address = load->GetAddress() & ~MaxInt<uint64_t>(offset_bits);
- uint32_t offset = load->GetAddress() & MaxInt<uint64_t>(offset_bits);
- __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address));
- // /* GcRoot<mirror::String> */ out = *(base_address + offset)
- GenerateGcRootFieldLoad(load, out_loc, out.X(), offset);
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- // Add ADRP with its PC-relative DexCache access patch.
- const DexFile& dex_file = load->GetDexFile();
- uint32_t element_offset = load->GetDexCacheElementOffset();
- vixl::aarch64::Label* adrp_label =
- codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
- {
- SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ Bind(adrp_label);
- __ adrp(out.X(), /* offset placeholder */ 0);
- }
- // Add LDR with its PC-relative DexCache access patch.
- vixl::aarch64::Label* ldr_label =
- codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
- // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, out.X(), /* offset placeholder */ 0, ldr_label);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- Register current_method = InputRegisterAt(load, 0);
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(
- load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(
- load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- break;
- }
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
- codegen_->AddSlowPath(slow_path);
- __ Cbz(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
+ codegen_->AddSlowPath(slow_path);
+ __ B(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
@@ -5088,7 +5041,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path used to mark the GC root `root`.
+ // Slow path marking the GC root `root`.
SlowPathCodeARM64* slow_path =
new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root);
codegen_->AddSlowPath(slow_path);
@@ -5239,7 +5192,8 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// Introduce a dependency on the lock_word including rb_state,
// to prevent load-load reordering, and without using
// a memory barrier (which would be more expensive).
- // obj is unchanged by this operation, but its value now depends on temp.
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
__ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32));
// The actual reference load.
@@ -5285,7 +5239,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// Object* ref = ref_addr->AsMirrorPtr()
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
- // Slow path used to mark the object `ref` when it is gray.
+ // Slow path marking the object `ref` when it is gray.
SlowPathCodeARM64* slow_path =
new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
AddSlowPath(slow_path);
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 58879bc2f1..8a2f90d541 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1833,11 +1833,19 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) {
}
}
+auto InstructionCodeGeneratorMIPS::GetImplicitNullChecker(HInstruction* instruction) {
+ auto null_checker = [this, instruction]() {
+ this->codegen_->MaybeRecordImplicitNullCheck(instruction);
+ };
+ return null_checker;
+}
+
void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
Register obj = locations->InAt(0).AsRegister<Register>();
Location index = locations->InAt(1);
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+ auto null_checker = GetImplicitNullChecker(instruction);
Primitive::Type type = instruction->GetType();
switch (type) {
@@ -1846,10 +1854,10 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
+ __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset, null_checker);
} else {
__ Addu(TMP, obj, index.AsRegister<Register>());
- __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1859,10 +1867,10 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
+ __ LoadFromOffset(kLoadSignedByte, out, obj, offset, null_checker);
} else {
__ Addu(TMP, obj, index.AsRegister<Register>());
- __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1872,11 +1880,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
+ __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_2);
__ Addu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1886,11 +1894,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
+ __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_2);
__ Addu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1902,11 +1910,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ LoadFromOffset(kLoadWord, out, obj, offset);
+ __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
__ Addu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadWord, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1916,11 +1924,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ LoadFromOffset(kLoadDoubleword, out, obj, offset);
+ __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
__ Addu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1930,11 +1938,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ LoadSFromOffset(out, obj, offset);
+ __ LoadSFromOffset(out, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
__ Addu(TMP, obj, TMP);
- __ LoadSFromOffset(out, TMP, data_offset);
+ __ LoadSFromOffset(out, TMP, data_offset, null_checker);
}
break;
}
@@ -1944,11 +1952,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ LoadDFromOffset(out, obj, offset);
+ __ LoadDFromOffset(out, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
__ Addu(TMP, obj, TMP);
- __ LoadDFromOffset(out, TMP, data_offset);
+ __ LoadDFromOffset(out, TMP, data_offset, null_checker);
}
break;
}
@@ -1957,7 +1965,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
}
void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) {
@@ -2004,6 +2011,7 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
bool needs_runtime_call = locations->WillCall();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+ auto null_checker = GetImplicitNullChecker(instruction);
switch (value_type) {
case Primitive::kPrimBoolean:
@@ -2013,10 +2021,10 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ StoreToOffset(kStoreByte, value, obj, offset);
+ __ StoreToOffset(kStoreByte, value, obj, offset, null_checker);
} else {
__ Addu(TMP, obj, index.AsRegister<Register>());
- __ StoreToOffset(kStoreByte, value, TMP, data_offset);
+ __ StoreToOffset(kStoreByte, value, TMP, data_offset, null_checker);
}
break;
}
@@ -2028,11 +2036,11 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ StoreToOffset(kStoreHalfword, value, obj, offset);
+ __ StoreToOffset(kStoreHalfword, value, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_2);
__ Addu(TMP, obj, TMP);
- __ StoreToOffset(kStoreHalfword, value, TMP, data_offset);
+ __ StoreToOffset(kStoreHalfword, value, TMP, data_offset, null_checker);
}
break;
}
@@ -2045,14 +2053,13 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ StoreToOffset(kStoreWord, value, obj, offset);
+ __ StoreToOffset(kStoreWord, value, obj, offset, null_checker);
} else {
DCHECK(index.IsRegister()) << index;
__ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
__ Addu(TMP, obj, TMP);
- __ StoreToOffset(kStoreWord, value, TMP, data_offset);
+ __ StoreToOffset(kStoreWord, value, TMP, data_offset, null_checker);
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
if (needs_write_barrier) {
DCHECK_EQ(value_type, Primitive::kPrimNot);
codegen_->MarkGCCard(obj, value);
@@ -2075,11 +2082,11 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ StoreToOffset(kStoreDoubleword, value, obj, offset);
+ __ StoreToOffset(kStoreDoubleword, value, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
__ Addu(TMP, obj, TMP);
- __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset);
+ __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset, null_checker);
}
break;
}
@@ -2091,11 +2098,11 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ StoreSToOffset(value, obj, offset);
+ __ StoreSToOffset(value, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
__ Addu(TMP, obj, TMP);
- __ StoreSToOffset(value, TMP, data_offset);
+ __ StoreSToOffset(value, TMP, data_offset, null_checker);
}
break;
}
@@ -2107,11 +2114,11 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ StoreDToOffset(value, obj, offset);
+ __ StoreDToOffset(value, obj, offset, null_checker);
} else {
__ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
__ Addu(TMP, obj, TMP);
- __ StoreDToOffset(value, TMP, data_offset);
+ __ StoreDToOffset(value, TMP, data_offset, null_checker);
}
break;
}
@@ -2120,11 +2127,6 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
-
- // Ints and objects are handled in the switch.
- if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- }
}
void LocationsBuilderMIPS::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -3589,6 +3591,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
LoadOperandType load_type = kLoadUnsignedByte;
bool is_volatile = field_info.IsVolatile();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+ auto null_checker = GetImplicitNullChecker(instruction);
switch (type) {
case Primitive::kPrimBoolean:
@@ -3654,34 +3657,20 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
if (type == Primitive::kPrimLong) {
DCHECK(locations->Out().IsRegisterPair());
dst = locations->Out().AsRegisterPairLow<Register>();
- Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
- if (obj == dst) {
- __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ LoadFromOffset(kLoadWord, dst, obj, offset);
- } else {
- __ LoadFromOffset(kLoadWord, dst, obj, offset);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
- }
} else {
DCHECK(locations->Out().IsRegister());
dst = locations->Out().AsRegister<Register>();
- __ LoadFromOffset(load_type, dst, obj, offset);
}
+ __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
} else {
DCHECK(locations->Out().IsFpuRegister());
FRegister dst = locations->Out().AsFpuRegister<FRegister>();
if (type == Primitive::kPrimFloat) {
- __ LoadSFromOffset(dst, obj, offset);
+ __ LoadSFromOffset(dst, obj, offset, null_checker);
} else {
- __ LoadDFromOffset(dst, obj, offset);
+ __ LoadDFromOffset(dst, obj, offset, null_checker);
}
}
- // Longs are handled earlier.
- if (type != Primitive::kPrimLong) {
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- }
}
if (is_volatile) {
@@ -3729,6 +3718,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
StoreOperandType store_type = kStoreByte;
bool is_volatile = field_info.IsVolatile();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+ auto null_checker = GetImplicitNullChecker(instruction);
switch (type) {
case Primitive::kPrimBoolean:
@@ -3800,28 +3790,20 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
if (type == Primitive::kPrimLong) {
DCHECK(locations->InAt(1).IsRegisterPair());
src = locations->InAt(1).AsRegisterPairLow<Register>();
- Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>();
- __ StoreToOffset(kStoreWord, src, obj, offset);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize);
} else {
DCHECK(locations->InAt(1).IsRegister());
src = locations->InAt(1).AsRegister<Register>();
- __ StoreToOffset(store_type, src, obj, offset);
}
+ __ StoreToOffset(store_type, src, obj, offset, null_checker);
} else {
DCHECK(locations->InAt(1).IsFpuRegister());
FRegister src = locations->InAt(1).AsFpuRegister<FRegister>();
if (type == Primitive::kPrimFloat) {
- __ StoreSToOffset(src, obj, offset);
+ __ StoreSToOffset(src, obj, offset, null_checker);
} else {
- __ StoreDToOffset(src, obj, offset);
+ __ StoreDToOffset(src, obj, offset, null_checker);
}
}
- // Longs are handled earlier.
- if (type != Primitive::kPrimLong) {
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- }
}
// TODO: memory barriers?
@@ -4580,11 +4562,6 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
break;
- // We need an extra register for PC-relative dex cache accesses.
- case HLoadString::LoadKind::kDexCachePcRelative:
- case HLoadString::LoadKind::kDexCacheViaMethod:
- base_or_current_method_reg = locations->InAt(0).AsRegister<Register>();
- break;
default:
base_or_current_method_reg = ZERO;
break;
@@ -4628,52 +4605,15 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
codegen_->DeduplicateBootImageAddressLiteral(address));
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
- static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
- DCHECK_ALIGNED(load->GetAddress(), 4u);
- int16_t offset = Low16Bits(address);
- uint32_t base_address = address - offset; // This accounts for offset sign extension.
- __ Lui(out, High16Bits(base_address));
- // /* GcRoot<mirror::String> */ out = *(base_address + offset)
- GenerateGcRootFieldLoad(load, out_loc, out, offset);
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- HMipsDexCacheArraysBase* base = load->InputAt(0)->AsMipsDexCacheArraysBase();
- int32_t offset =
- load->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
- // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset)
- GenerateGcRootFieldLoad(load, out_loc, base_or_current_method_reg, offset);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(load,
- out_loc,
- base_or_current_method_reg,
- ArtMethod::DeclaringClassOffset().Int32Value());
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(load,
- out_loc,
- out,
- CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- break;
- }
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
- codegen_->AddSlowPath(slow_path);
- __ Beqz(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
+ codegen_->AddSlowPath(slow_path);
+ __ B(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 63a0345c1c..46810d658f 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -257,6 +257,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
+ auto GetImplicitNullChecker(HInstruction* instruction);
MipsAssembler* const assembler_;
CodeGeneratorMIPS* const codegen_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 4e7a2728b1..4a5755c925 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3261,22 +3261,11 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
}
void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) {
- LocationSummary* locations = load->GetLocations();
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
- GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
- __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
- ArtMethod::DeclaringClassOffset().Int32Value());
- __ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
- __ LoadFromOffset(
- kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- // TODO: We will need a read barrier here.
-
- if (!load->IsInDexCache()) {
- SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
- codegen_->AddSlowPath(slow_path);
- __ Beqzc(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
+ codegen_->AddSlowPath(slow_path);
+ __ Bc(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 7a561bb4ad..f50eb5cb7e 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -445,8 +445,8 @@ class ArraySetSlowPathX86 : public SlowPathCode {
// Slow path marking an object during a read barrier.
class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
public:
- ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj)
- : SlowPathCode(instruction), obj_(obj) {
+ ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj, bool unpoison)
+ : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -464,11 +464,16 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
+ if (unpoison_) {
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(reg);
+ }
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
@@ -498,6 +503,7 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
private:
const Location obj_;
+ const bool unpoison_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
};
@@ -1578,15 +1584,15 @@ void LocationsBuilderX86::VisitSelect(HSelect* select) {
locations->SetOut(Location::SameAsFirstInput());
}
-void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
+void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
Register lhs_reg = lhs.AsRegister<Register>();
if (rhs.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- codegen_->Compare32BitValue(lhs_reg, value);
+ Compare32BitValue(lhs_reg, value);
} else if (rhs.IsStackSlot()) {
- __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
+ assembler_.cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
} else {
- __ cmpl(lhs_reg, rhs.AsRegister<Register>());
+ assembler_.cmpl(lhs_reg, rhs.AsRegister<Register>());
}
}
@@ -1619,7 +1625,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
LocationSummary* cond_locations = condition->GetLocations();
- GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
+ codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
cond = X86Condition(condition->GetCondition());
}
} else {
@@ -1728,7 +1734,7 @@ void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
// Clear output register: setb only sets the low byte.
__ xorl(reg, reg);
- GenerateIntCompare(lhs, rhs);
+ codegen_->GenerateIntCompare(lhs, rhs);
__ setb(X86Condition(cond->GetCondition()), reg);
return;
}
@@ -4210,7 +4216,7 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
case Primitive::kPrimShort:
case Primitive::kPrimChar:
case Primitive::kPrimInt: {
- GenerateIntCompare(left, right);
+ codegen_->GenerateIntCompare(left, right);
break;
}
case Primitive::kPrimLong: {
@@ -4630,10 +4636,6 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI
// load the temp into the XMM and then copy the XMM into the
// output, 32 bits at a time).
locations->AddTemp(Location::RequiresFpuRegister());
- } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
}
}
@@ -4677,11 +4679,10 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
case Primitive::kPrimNot: {
// /* HeapReference<Object> */ out = *(base + offset)
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp_loc = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+ instruction, out, base, offset, /* needs_null_check */ true);
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
@@ -5092,11 +5093,6 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
Location::kOutputOverlap :
Location::kNoOutputOverlap);
}
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
- if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
- locations->AddTemp(Location::RequiresRegister());
- }
}
void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
@@ -5171,11 +5167,10 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
} else {
Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
@@ -6230,48 +6225,15 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
codegen_->RecordSimplePatch();
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
- // /* GcRoot<mirror::String> */ out = *address
- GenerateGcRootFieldLoad(load, out_loc, Address::Absolute(address));
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- Register base_reg = locations->InAt(0).AsRegister<Register>();
- uint32_t offset = load->GetDexCacheElementOffset();
- Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
- // /* GcRoot<mirror::String> */ out = *(base + offset) /* PC-relative */
- GenerateGcRootFieldLoad(
- load, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- Register current_method = locations->InAt(0).AsRegister<Register>();
-
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(
- load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(
- load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
- break;
- }
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
- codegen_->AddSlowPath(slow_path);
- __ testl(out, out);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
+ codegen_->AddSlowPath(slow_path);
+ __ jmp(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
static Address GetExceptionTlsAddress() {
@@ -6313,8 +6275,8 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
return kEmitCompilerReadBarrier &&
- (kUseBakerReadBarrier ||
- type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ !kUseBakerReadBarrier &&
+ (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
type_check_kind == TypeCheckKind::kArrayObjectCheck);
}
@@ -6375,7 +6337,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
}
// /* HeapReference<Class> */ out = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
@@ -6597,7 +6559,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
@@ -6633,8 +6595,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&compare_classes);
@@ -6673,8 +6634,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -6706,8 +6666,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&check_non_primitive_component_type);
@@ -6715,8 +6674,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
__ j(kEqual, &done);
// Same comment as above regarding `temp` and the slow path.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -6907,17 +6865,17 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction*
Location maybe_temp) {
Register out_reg = out.AsRegister<Register>();
if (kEmitCompilerReadBarrier) {
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
+ instruction, out, out_reg, offset, /* needs_null_check */ false);
} else {
// Load with slow path based read barrier.
// Save the value of `out` into `maybe_temp` before overwriting it
// in the following move operation, as we will need it for the
// read barrier below.
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
__ movl(maybe_temp.AsRegister<Register>(), out_reg);
// /* HeapReference<Object> */ out = *(out + offset)
__ movl(out_reg, Address(out_reg, offset));
@@ -6934,17 +6892,15 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction*
void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset,
- Location maybe_temp) {
+ uint32_t offset) {
Register out_reg = out.AsRegister<Register>();
Register obj_reg = obj.AsRegister<Register>();
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
+ instruction, out, obj_reg, offset, /* needs_null_check */ false);
} else {
// Load with slow path based read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -6987,9 +6943,9 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path used to mark the GC root `root`.
- SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root);
+ // Slow path marking the GC root `root`.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
+ instruction, root, /* unpoison */ false);
codegen_->AddSlowPath(slow_path);
__ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()),
@@ -7023,14 +6979,13 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
Location ref,
Register obj,
uint32_t offset,
- Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
// /* HeapReference<Object> */ ref = *(obj + offset)
Address src(obj, offset);
- GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
}
void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -7038,7 +6993,6 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
Register obj,
uint32_t data_offset,
Location index,
- Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -7051,14 +7005,13 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
Address src = index.IsConstant() ?
Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset);
- GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
}
void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
Register obj,
const Address& src,
- Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -7088,17 +7041,23 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// performance reasons.
Register ref_reg = ref.AsRegister<Register>();
- Register temp_reg = temp.AsRegister<Register>();
uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- // /* int32_t */ monitor = obj->monitor_
- __ movl(temp_reg, Address(obj, monitor_offset));
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
+ constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
+ constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // ref = ReadBarrier::Mark(ref);
+ // At this point, just do the "if" and make sure that flags are preserved until the branch.
+ __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86 memory model.
@@ -7106,25 +7065,20 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// The actual reference load.
// /* HeapReference<Object> */ ref = *src
- __ movl(ref_reg, src);
+ __ movl(ref_reg, src); // Flags are unaffected.
+
+ // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
+ // Slow path marking the object `ref` when it is gray.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
+ instruction, ref, /* unpoison */ true);
+ AddSlowPath(slow_path);
+
+ // We have done the "if" of the gray bit check above, now branch based on the flags.
+ __ j(kNotZero, slow_path->GetEntryLabel());
// Object* ref = ref_addr->AsMirrorPtr()
__ MaybeUnpoisonHeapReference(ref_reg);
- // Slow path used to mark the object `ref` when it is gray.
- SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref);
- AddSlowPath(slow_path);
-
- // if (rb_state == ReadBarrier::gray_ptr_)
- // ref = ReadBarrier::Mark(ref);
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with SHR.
- static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
- static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
- __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1));
- __ j(kCarrySet, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f306b33247..c644e401ff 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -254,8 +254,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset,
- Location maybe_temp);
+ uint32_t offset);
// Generate a GC root reference load:
//
// root <- *address
@@ -295,7 +294,6 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
HBasicBlock* default_block);
void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
- void GenerateIntCompare(Location lhs, Location rhs);
X86Assembler* const assembler_;
CodeGeneratorX86* const codegen_;
@@ -431,6 +429,8 @@ class CodeGeneratorX86 : public CodeGenerator {
Register value,
bool value_can_be_null);
+ void GenerateIntCompare(Location lhs, Location rhs);
+
void GenerateMemoryBarrier(MemBarrierKind kind);
Label* GetLabelOf(HBasicBlock* block) const {
@@ -486,7 +486,6 @@ class CodeGeneratorX86 : public CodeGenerator {
Location ref,
Register obj,
uint32_t offset,
- Location temp,
bool needs_null_check);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
@@ -495,7 +494,6 @@ class CodeGeneratorX86 : public CodeGenerator {
Register obj,
uint32_t data_offset,
Location index,
- Location temp,
bool needs_null_check);
// Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
@@ -503,7 +501,6 @@ class CodeGeneratorX86 : public CodeGenerator {
Location ref,
Register obj,
const Address& src,
- Location temp,
bool needs_null_check);
// Generate a read barrier for a heap reference within `instruction`
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index cf01a791ee..ec37e5db22 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -466,8 +466,8 @@ class ArraySetSlowPathX86_64 : public SlowPathCode {
// Slow path marking an object during a read barrier.
class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
public:
- ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj)
- : SlowPathCode(instruction), obj_(obj) {
+ ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj, bool unpoison)
+ : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -485,11 +485,16 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
+ if (unpoison_) {
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(obj_.AsRegister<CpuRegister>());
+ }
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
@@ -519,6 +524,7 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
private:
const Location obj_;
+ const bool unpoison_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
};
@@ -4151,11 +4157,6 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
Location::RequiresRegister(),
object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
- if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
- }
}
void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
@@ -4199,11 +4200,10 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
case Primitive::kPrimNot: {
// /* HeapReference<Object> */ out = *(base + offset)
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp_loc = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+ instruction, out, base, offset, /* needs_null_check */ true);
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
@@ -4587,11 +4587,6 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
Location::RequiresRegister(),
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
- if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
- locations->AddTemp(Location::RequiresRegister());
- }
}
void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
@@ -4666,11 +4661,10 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
} else {
CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
@@ -5635,53 +5629,15 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
codegen_->RecordSimplePatch();
return; // No dex cache slow path.
}
- case HLoadString::LoadKind::kDexCacheAddress: {
- DCHECK_NE(load->GetAddress(), 0u);
- // /* GcRoot<mirror::String> */ out = *address
- if (IsUint<32>(load->GetAddress())) {
- Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true);
- GenerateGcRootFieldLoad(load, out_loc, address);
- } else {
- // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
- __ movq(out, Immediate(load->GetAddress()));
- GenerateGcRootFieldLoad(load, out_loc, Address(out, 0));
- }
- break;
- }
- case HLoadString::LoadKind::kDexCachePcRelative: {
- uint32_t offset = load->GetDexCacheElementOffset();
- Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
- Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
- /* no_rip */ false);
- // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
- break;
- }
- case HLoadString::LoadKind::kDexCacheViaMethod: {
- CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
-
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- GenerateGcRootFieldLoad(
- load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
- // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
- __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
- // /* GcRoot<mirror::String> */ out = out[string_index]
- GenerateGcRootFieldLoad(
- load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
- break;
- }
default:
- LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
- UNREACHABLE();
+ break;
}
- if (!load->IsInDexCache()) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
- codegen_->AddSlowPath(slow_path);
- __ testl(out, out);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ // TODO: Re-add the compiler code to do string dex cache lookup again.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
+ codegen_->AddSlowPath(slow_path);
+ __ jmp(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
static Address GetExceptionTlsAddress() {
@@ -5724,8 +5680,8 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
return kEmitCompilerReadBarrier &&
- (kUseBakerReadBarrier ||
- type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ !kUseBakerReadBarrier &&
+ (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
type_check_kind == TypeCheckKind::kArrayObjectCheck);
}
@@ -5786,7 +5742,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
// /* HeapReference<Class> */ out = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
@@ -6016,8 +5972,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
if (cls.IsRegister()) {
__ cmpl(temp, cls.AsRegister<CpuRegister>());
@@ -6041,8 +5996,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
@@ -6062,8 +6016,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&compare_classes);
@@ -6087,8 +6040,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
// Walk over the class hierarchy to find a match.
NearLabel loop;
@@ -6114,8 +6066,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&done);
break;
@@ -6134,8 +6085,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
// Do an exact check.
NearLabel check_non_primitive_component_type;
@@ -6163,8 +6113,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&check_non_primitive_component_type);
@@ -6172,8 +6121,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
__ j(kEqual, &done);
// Same comment as above regarding `temp` and the slow path.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&done);
break;
@@ -6189,8 +6137,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(
- instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
// We always go into the type check slow path for the unresolved
// and interface check cases.
@@ -6358,17 +6305,17 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi
Location maybe_temp) {
CpuRegister out_reg = out.AsRegister<CpuRegister>();
if (kEmitCompilerReadBarrier) {
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
+ instruction, out, out_reg, offset, /* needs_null_check */ false);
} else {
// Load with slow path based read barrier.
// Save the value of `out` into `maybe_temp` before overwriting it
// in the following move operation, as we will need it for the
// read barrier below.
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
__ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
// /* HeapReference<Object> */ out = *(out + offset)
__ movl(out_reg, Address(out_reg, offset));
@@ -6385,17 +6332,15 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi
void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset,
- Location maybe_temp) {
+ uint32_t offset) {
CpuRegister out_reg = out.AsRegister<CpuRegister>();
CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
+ instruction, out, obj_reg, offset, /* needs_null_check */ false);
} else {
// Load with slow path based read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -6438,9 +6383,9 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr
"art::mirror::CompressedReference<mirror::Object> and int32_t "
"have different sizes.");
- // Slow path used to mark the GC root `root`.
- SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root);
+ // Slow path marking the GC root `root`.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
+ instruction, root, /* unpoison */ false);
codegen_->AddSlowPath(slow_path);
__ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(),
@@ -6475,14 +6420,13 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in
Location ref,
CpuRegister obj,
uint32_t offset,
- Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
// /* HeapReference<Object> */ ref = *(obj + offset)
Address src(obj, offset);
- GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
}
void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6490,7 +6434,6 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in
CpuRegister obj,
uint32_t data_offset,
Location index,
- Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -6503,14 +6446,13 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in
Address src = index.IsConstant() ?
Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
- GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
}
void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
CpuRegister obj,
const Address& src,
- Location temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -6540,17 +6482,23 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// performance reasons.
CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
- CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- // /* int32_t */ monitor = obj->monitor_
- __ movl(temp_reg, Address(obj, monitor_offset));
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
+ constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
+ constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // ref = ReadBarrier::Mark(ref);
+ // At this point, just do the "if" and make sure that flags are preserved until the branch.
+ __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86-64 memory model.
@@ -6558,25 +6506,20 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// The actual reference load.
// /* HeapReference<Object> */ ref = *src
- __ movl(ref_reg, src);
+ __ movl(ref_reg, src); // Flags are unaffected.
+
+ // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
+ // Slow path marking the object `ref` when it is gray.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
+ instruction, ref, /* unpoison */ true);
+ AddSlowPath(slow_path);
+
+ // We have done the "if" of the gray bit check above, now branch based on the flags.
+ __ j(kNotZero, slow_path->GetEntryLabel());
// Object* ref = ref_addr->AsMirrorPtr()
__ MaybeUnpoisonHeapReference(ref_reg);
- // Slow path used to mark the object `ref` when it is gray.
- SlowPathCode* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref);
- AddSlowPath(slow_path);
-
- // if (rb_state == ReadBarrier::gray_ptr_)
- // ref = ReadBarrier::Mark(ref);
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with SHR.
- static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
- static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
- __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1));
- __ j(kCarrySet, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 4e0e34ce38..44844ac67a 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -248,8 +248,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset,
- Location maybe_temp);
+ uint32_t offset);
// Generate a GC root reference load:
//
// root <- *address
@@ -427,7 +426,6 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Location ref,
CpuRegister obj,
uint32_t offset,
- Location temp,
bool needs_null_check);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
@@ -436,7 +434,6 @@ class CodeGeneratorX86_64 : public CodeGenerator {
CpuRegister obj,
uint32_t data_offset,
Location index,
- Location temp,
bool needs_null_check);
// Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
@@ -444,7 +441,6 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Location ref,
CpuRegister obj,
const Address& src,
- Location temp,
bool needs_null_check);
// Generate a read barrier for a heap reference within `instruction`
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 18db507c48..fe6c0a305e 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -29,12 +29,6 @@
#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "base/macros.h"
#include "builder.h"
-#include "code_generator_arm.h"
-#include "code_generator_arm64.h"
-#include "code_generator_mips.h"
-#include "code_generator_mips64.h"
-#include "code_generator_x86.h"
-#include "code_generator_x86_64.h"
#include "code_simulator_container.h"
#include "common_compiler_test.h"
#include "dex_file.h"
@@ -52,10 +46,35 @@
#include "utils/mips64/managed_register_mips64.h"
#include "utils/x86/managed_register_x86.h"
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "code_generator_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "code_generator_arm64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+#include "code_generator_x86.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "code_generator_x86_64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "code_generator_mips.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+#include "code_generator_mips64.h"
+#endif
+
#include "gtest/gtest.h"
namespace art {
+#ifdef ART_ENABLE_CODEGEN_arm
// Provide our own codegen, that ensures the C calling conventions
// are preserved. Currently, ART and C do not match as R4 is caller-save
// in ART, and callee-save in C. Alternatively, we could use or write
@@ -80,7 +99,9 @@ class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
blocked_register_pairs_[arm::R6_R7] = false;
}
};
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
public:
TestCodeGeneratorX86(HGraph* graph,
@@ -105,6 +126,7 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
blocked_register_pairs_[x86::ECX_EDI] = false;
}
};
+#endif
class InternalCodeAllocator : public CodeAllocator {
public:
@@ -234,37 +256,54 @@ static void RunCode(InstructionSet target_isa,
bool has_result,
Expected expected) {
CompilerOptions compiler_options;
+#ifdef ART_ENABLE_CODEGEN_arm
if (target_isa == kArm || target_isa == kThumb2) {
std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
ArmInstructionSetFeatures::FromCppDefines());
TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kArm64) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+ if (target_isa == kArm64) {
std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
Arm64InstructionSetFeatures::FromCppDefines());
arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kX86) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+ if (target_isa == kX86) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kX86_64) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+ if (target_isa == kX86_64) {
std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
X86_64InstructionSetFeatures::FromCppDefines());
x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kMips) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+ if (target_isa == kMips) {
std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
MipsInstructionSetFeatures::FromCppDefines());
mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kMips64) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+ if (target_isa == kMips64) {
std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
Mips64InstructionSetFeatures::FromCppDefines());
mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
}
+#endif
}
static ::std::vector<InstructionSet> GetTargetISAs() {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 89d80cc281..b3d5341de0 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -122,7 +122,10 @@ class HGraphVisualizerDisassembler {
new DisassemblerOptions(/* absolute_addresses */ false,
base_address,
end_address,
- /* can_read_literals */ true)));
+ /* can_read_literals */ true,
+ Is64BitInstructionSet(instruction_set)
+ ? &Thread::DumpThreadOffset<PointerSize::k64>
+ : &Thread::DumpThreadOffset<PointerSize::k32>)));
}
~HGraphVisualizerDisassembler() {
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 27d9d48560..0bbc0e54bc 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -41,6 +41,92 @@ ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() {
using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathARM(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+ uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ Register dest = locations->InAt(2).AsRegister<Register>();
+ Location dest_pos = locations->InAt(3);
+ Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>();
+ Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>();
+ Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>();
+ Register tmp = locations->GetTemp(3).AsRegister<Register>();
+
+ __ Bind(GetEntryLabel());
+ // Compute the base destination address in `dst_curr_addr`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(dst_curr_addr, dest, element_size * constant + offset);
+ } else {
+ __ add(dst_curr_addr,
+ dest,
+ ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(dst_curr_addr, offset);
+ }
+
+ Label loop;
+ __ Bind(&loop);
+ __ ldr(tmp, Address(src_curr_addr, element_size, Address::PostIndex));
+ __ MaybeUnpoisonHeapReference(tmp);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // tmp = ReadBarrier::Mark(tmp);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(tmp, SP);
+ DCHECK_NE(tmp, LR);
+ DCHECK_NE(tmp, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary (and not preserved). It thus cannot be used by
+ // any live register in this slow path.
+ DCHECK_NE(src_curr_addr, IP);
+ DCHECK_NE(dst_curr_addr, IP);
+ DCHECK_NE(src_stop_addr, IP);
+ DCHECK_NE(tmp, IP);
+ DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp;
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(tmp);
+ __ str(tmp, Address(dst_curr_addr, element_size, Address::PostIndex));
+ __ cmp(src_curr_addr, ShifterOperand(src_stop_addr));
+ __ b(&loop, NE);
+ __ b(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM);
+};
+
+#undef __
+
bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
Dispatch(invoke);
LocationSummary* res = invoke->GetLocations();
@@ -1337,9 +1423,9 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke)
}
void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1362,6 +1448,13 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
locations->SetInAt(4, Location::RequiresRegister());
}
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP cannot be used in
+ // ReadBarrierSystemArrayCopySlowPathARM64 (because that register
+ // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
+ // temporary register from the register allocator.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
static void CheckPosition(ArmAssembler* assembler,
@@ -1427,9 +1520,9 @@ static void CheckPosition(ArmAssembler* assembler,
}
void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
ArmAssembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1438,18 +1531,22 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = locations->InAt(0).AsRegister<Register>();
Location src_pos = locations->InAt(1);
Register dest = locations->InAt(2).AsRegister<Register>();
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
- Register temp1 = locations->GetTemp(0).AsRegister<Register>();
- Register temp2 = locations->GetTemp(1).AsRegister<Register>();
- Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Location temp2_loc = locations->GetTemp(1);
+ Register temp2 = temp2_loc.AsRegister<Register>();
+ Location temp3_loc = locations->GetTemp(2);
+ Register temp3 = temp3_loc.AsRegister<Register>();
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
Label conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -1465,7 +1562,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmp(src, ShifterOperand(dest));
- __ b(slow_path->GetEntryLabel(), EQ);
+ __ b(intrinsic_slow_path->GetEntryLabel(), EQ);
}
// Checked when building locations.
@@ -1477,7 +1574,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
__ b(&conditions_on_positions_validated, NE);
}
__ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant));
- __ b(slow_path->GetEntryLabel(), GT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), GT);
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1490,19 +1587,19 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
} else {
__ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>()));
}
- __ b(slow_path->GetEntryLabel(), LT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), LT);
}
__ Bind(&conditions_on_positions_validated);
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
- __ CompareAndBranchIfZero(src, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
- __ CompareAndBranchIfZero(dest, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
}
// If the length is negative, bail out.
@@ -1511,7 +1608,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ cmp(length.AsRegister<Register>(), ShifterOperand(0));
- __ b(slow_path->GetEntryLabel(), LT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), LT);
}
// Validity checks: source.
@@ -1519,7 +1616,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -1528,7 +1625,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -1537,112 +1634,287 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
- __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
- bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- __ MaybeUnpoisonHeapReference(temp1);
- __ MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
- }
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
- __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp1, temp1, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ }
- if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp2->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
- __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
- }
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp2, temp2, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ cmp(temp1, ShifterOperand(temp2));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ Label do_copy;
+ __ b(&do_copy, EQ);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ b(intrinsic_slow_path->GetEntryLabel(), NE);
+ }
+ } else {
+ // Non read barrier code.
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
+ bool did_unpoison = false;
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
- __ cmp(temp1, ShifterOperand(temp2));
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp2->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- Label do_copy;
- __ b(&do_copy, EQ);
- if (!did_unpoison) {
+ __ cmp(temp1, ShifterOperand(temp2));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ Label do_copy;
+ __ b(&do_copy, EQ);
+ if (!did_unpoison) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
__ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ // No need to unpoison the result, we're comparing against null.
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ b(intrinsic_slow_path->GetEntryLabel(), NE);
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
- // No need to unpoison the result, we're comparing against null.
- __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ b(slow_path->GetEntryLabel(), NE);
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp3` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ }
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
__ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
}
- // Compute base source address, base destination address, and end source address.
-
int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ // Compute the base source address in `temp1`.
if (src_pos.IsConstant()) {
int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
__ AddConstant(temp1, src, element_size * constant + offset);
} else {
- __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, 2));
+ __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, element_size_shift));
__ AddConstant(temp1, offset);
}
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ AddConstant(temp2, dest, element_size * constant + offset);
- } else {
- __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, 2));
- __ AddConstant(temp2, offset);
- }
-
+ // Compute the end source address in `temp3`.
if (length.IsConstant()) {
int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
__ AddConstant(temp3, temp1, element_size * constant);
} else {
- __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, 2));
+ __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, element_size_shift));
}
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- Label loop, done;
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&done, EQ);
- __ Bind(&loop);
- __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
- __ str(IP, Address(temp2, element_size, Address::PostIndex));
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&loop, NE);
- __ Bind(&done);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // The base destination address is computed later, as `temp2` is
+ // used for intermediate computations.
+
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ Label loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&done, EQ);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset);
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation, but its value now depends
+ // on `temp2`.
+ __ add(src, src, ShifterOperand(temp2, LSR, 32));
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+ // Carry flag is the last bit shifted out by LSRS.
+ __ b(read_barrier_slow_path->GetEntryLabel(), CS);
+
+ // Fast-path copy.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(temp2, dest, element_size * constant + offset);
+ } else {
+ __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(temp2, offset);
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+ __ str(IP, Address(temp2, element_size, Address::PostIndex));
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&loop, NE);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(temp2, dest, element_size * constant + offset);
+ } else {
+ __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(temp2, offset);
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ Label loop, done;
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&done, EQ);
+ __ Bind(&loop);
+ __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+ __ str(IP, Address(temp2, element_size, Address::PostIndex));
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&loop, NE);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -1651,7 +1923,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
Register(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 9cfe3ce569..91374b3108 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -144,6 +144,73 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
};
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
+ : SlowPathCodeARM64(instruction), tmp_(tmp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+ Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
+ Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
+ Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+ Register tmp_reg = WRegisterFrom(tmp_);
+
+ __ Bind(GetEntryLabel());
+ vixl::aarch64::Label slow_copy_loop;
+ __ Bind(&slow_copy_loop);
+ __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
+ codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // tmp_reg = ReadBarrier::Mark(tmp_reg);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(tmp_.reg(), LR);
+ DCHECK_NE(tmp_.reg(), WSP);
+ DCHECK_NE(tmp_.reg(), WZR);
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary (and not preserved). It thus cannot be used by
+ // any live register in this slow path.
+ DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
+ DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
+ DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
+ DCHECK_NE(tmp_.reg(), IP0);
+ DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
+ // This runtime call does not require a stack map.
+ codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
+ __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&slow_copy_loop, ne);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
+
+ private:
+ Location tmp_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
+};
#undef __
bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
@@ -2035,9 +2102,9 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128;
// We want to use two temporary registers in order to reduce the register pressure in arm64.
// So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2090,12 +2157,20 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP0, obtained from the VIXL scratch register
+ // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
+ // (because that register is clobbered by ReadBarrierMarkRegX
+ // entry points). Get an extra temporary register from the
+ // register allocator.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2104,6 +2179,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = XRegisterFrom(locations->InAt(0));
Location src_pos = locations->InAt(1);
@@ -2111,10 +2187,12 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
Register temp1 = WRegisterFrom(locations->GetTemp(0));
+ Location temp1_loc = LocationFrom(temp1);
Register temp2 = WRegisterFrom(locations->GetTemp(1));
+ Location temp2_loc = LocationFrom(temp2);
- SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
vixl::aarch64::Label conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -2130,7 +2208,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ Cmp(src, dest);
- __ B(slow_path->GetEntryLabel(), eq);
+ __ B(intrinsic_slow_path->GetEntryLabel(), eq);
}
// Checked when building locations.
DCHECK(!optimizations.GetDestinationIsSource()
@@ -2141,7 +2219,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
__ B(&conditions_on_positions_validated, ne);
}
__ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
- __ B(slow_path->GetEntryLabel(), gt);
+ __ B(intrinsic_slow_path->GetEntryLabel(), gt);
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2150,19 +2228,19 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
}
__ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
- __ B(slow_path->GetEntryLabel(), lt);
+ __ B(intrinsic_slow_path->GetEntryLabel(), lt);
}
__ Bind(&conditions_on_positions_validated);
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
- __ Cbz(src, slow_path->GetEntryLabel());
+ __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
- __ Cbz(dest, slow_path->GetEntryLabel());
+ __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
}
// We have already checked in the LocationsBuilder for the constant case.
@@ -2170,17 +2248,17 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
// If the length is negative, bail out.
- __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel());
+ __ Tbnz(WRegisterFrom(length), kWRegSize - 1, intrinsic_slow_path->GetEntryLabel());
// If the length >= 128 then (currently) prefer native implementation.
__ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
- __ B(slow_path->GetEntryLabel(), ge);
+ __ B(intrinsic_slow_path->GetEntryLabel(), ge);
}
// Validity checks: source.
CheckSystemArrayCopyPosition(masm,
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -2189,90 +2267,236 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
{
// We use a block to end the scratch scope before the write barrier, thus
// freeing the temporary registers so they can be used in `MarkGCCard`.
UseScratchRegisterScope temps(masm);
+ // Note: Because it is acquired from VIXL's scratch register pool,
+ // `temp3` might be IP0, and thus cannot be used as `ref` argument
+ // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
+ // calls below (see ReadBarrierMarkSlowPathARM64 for more details).
Register temp3 = temps.AcquireW();
+
if (!optimizations.GetDoesNotNeedTypeCheck()) {
// Check whether all elements of the source array are assignable to the component
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ Ldr(temp1, MemOperand(dest, class_offset));
- __ Ldr(temp2, MemOperand(src, class_offset));
- bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
- }
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ Ldr(temp3, HeapOperand(temp1, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ src.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ temp1,
+ component_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+ __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ }
- if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp2->component_type_
- __ Ldr(temp3, HeapOperand(temp2, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
- }
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ dest.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ temp1,
+ component_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
+ }
- __ Cmp(temp1, temp2);
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ src.W(),
+ class_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ Cmp(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ vixl::aarch64::Label do_copy;
+ __ B(&do_copy, eq);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ temp1,
+ component_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ Ldr(temp1, HeapOperand(temp1, super_offset));
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ B(intrinsic_slow_path->GetEntryLabel(), ne);
+ }
+ } else {
+ // Non read barrier code.
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ Ldr(temp1, MemOperand(dest, class_offset));
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ Ldr(temp2, MemOperand(src, class_offset));
+ bool did_unpoison = false;
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ Ldr(temp3, HeapOperand(temp1, component_offset));
+ __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- vixl::aarch64::Label do_copy;
- __ B(&do_copy, eq);
- if (!did_unpoison) {
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp2->component_type_
+ __ Ldr(temp3, HeapOperand(temp2, component_offset));
+ __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ __ Cmp(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ vixl::aarch64::Label do_copy;
+ __ B(&do_copy, eq);
+ if (!did_unpoison) {
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ Ldr(temp1, HeapOperand(temp1, component_offset));
codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ Ldr(temp1, HeapOperand(temp1, super_offset));
+ // No need to unpoison the result, we're comparing against null.
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ B(intrinsic_slow_path->GetEntryLabel(), ne);
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ Ldr(temp1, HeapOperand(temp1, component_offset));
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ Ldr(temp1, HeapOperand(temp1, super_offset));
- // No need to unpoison the result, we're comparing against null.
- __ Cbnz(temp1, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ B(slow_path->GetEntryLabel(), ne);
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ Ldr(temp1, HeapOperand(src.W(), class_offset));
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ Ldr(temp3, HeapOperand(temp1, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ src.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ temp1,
+ component_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ Ldr(temp1, HeapOperand(src.W(), class_offset));
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ __ Ldr(temp2, HeapOperand(temp1, component_offset));
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+ }
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
+ __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
}
Register src_curr_addr = temp1.X();
Register dst_curr_addr = temp2.X();
- Register src_stop_addr = temp3.X();
+ Register src_stop_addr;
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP0, obtained from the VIXL scratch
+ // register pool as `temp3`, cannot be used in
+ // ReadBarrierSystemArrayCopySlowPathARM64 (because that
+ // register is clobbered by ReadBarrierMarkRegX entry points).
+ // So another temporary register allocated by the register
+ // allocator instead.
+ DCHECK_EQ(LocationFrom(temp3).reg(), IP0);
+ src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+ } else {
+ src_stop_addr = temp3.X();
+ }
GenSystemArrayCopyAddresses(masm,
Primitive::kPrimNot,
@@ -2285,25 +2509,98 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
dst_curr_addr,
src_stop_addr);
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- vixl::aarch64::Label loop, done;
const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
- __ Bind(&loop);
- __ Cmp(src_curr_addr, src_stop_addr);
- __ B(&done, eq);
- {
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ vixl::aarch64::Label loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&done, eq);
+
Register tmp = temps.AcquireW();
+ // Make sure `tmp` is not IP0, as it is clobbered by
+ // ReadBarrierMarkRegX entry points in
+ // ReadBarrierSystemArrayCopySlowPathARM64.
+ DCHECK_NE(LocationFrom(tmp).reg(), IP0);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation, but its value now depends
+ // on `tmp`.
+ __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCodeARM64* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
__ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
__ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&loop, ne);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ vixl::aarch64::Label loop, done;
+ __ Bind(&loop);
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&done, eq);
+ {
+ Register tmp = temps.AcquireW();
+ __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+ __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+ }
+ __ B(&loop);
+ __ Bind(&done);
}
- __ B(&loop);
- __ Bind(&done);
}
// We only need one card marking on the destination array.
codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
static void GenIsInfinite(LocationSummary* locations,
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 55e1ab2451..6e5eb6622b 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2456,16 +2456,18 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) {
__ FloorWS(FTMP, in);
__ Mfc1(out, FTMP);
- __ LoadConst32(TMP, 1);
+ if (!IsR6()) {
+ __ LoadConst32(TMP, -1);
+ }
- // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0;
+ // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0;
__ LoadConst32(AT, std::numeric_limits<int32_t>::max());
__ Bne(AT, out, &finite);
__ Mtc1(ZERO, FTMP);
if (IsR6()) {
__ CmpLtS(FTMP, in, FTMP);
- __ Mfc1(AT, FTMP);
+ __ Mfc1(TMP, FTMP);
} else {
__ ColtS(in, FTMP);
}
@@ -2474,28 +2476,26 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) {
__ Bind(&finite);
- // TMP = (0.5f <= (in - out)) ? 1 : 0;
+ // TMP = (0.5f <= (in - out)) ? -1 : 0;
__ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float".
__ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
__ SubS(FTMP, in, FTMP);
__ Mtc1(AT, half);
if (IsR6()) {
__ CmpLeS(FTMP, half, FTMP);
- __ Mfc1(AT, FTMP);
+ __ Mfc1(TMP, FTMP);
} else {
__ ColeS(half, FTMP);
}
__ Bind(&add);
- if (IsR6()) {
- __ Selnez(TMP, TMP, AT);
- } else {
+ if (!IsR6()) {
__ Movf(TMP, ZERO);
}
- // Return out += TMP.
- __ Addu(out, out, TMP);
+ // Return out -= TMP.
+ __ Subu(out, out, TMP);
__ Bind(&done);
}
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 22f4181b92..cf4a040551 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -70,6 +70,105 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ Register src = locations->InAt(0).AsRegister<Register>();
+ Location src_pos = locations->InAt(1);
+ Register dest = locations->InAt(2).AsRegister<Register>();
+ Location dest_pos = locations->InAt(3);
+ Location length = locations->InAt(4);
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+ __ Bind(GetEntryLabel());
+ // In this code path, registers `temp1`, `temp2`, and `temp3`
+ // (resp.) are not used for the base source address, the base
+ // destination address, and the end source address (resp.), as in
+ // other SystemArrayCopy intrinsic code paths. Instead they are
+ // (resp.) used for:
+ // - the loop index (`i`);
+ // - the source index (`src_index`) and the loaded (source)
+ // reference (`value`); and
+ // - the destination index (`dest_index`).
+
+ // i = 0
+ __ xorl(temp1, temp1);
+ NearLabel loop;
+ __ Bind(&loop);
+ // value = src_array[i + src_pos]
+ if (src_pos.IsConstant()) {
+ int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+ int32_t adjusted_offset = offset + constant * element_size;
+ __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
+ } else {
+ __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+ __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
+ }
+ __ MaybeUnpoisonHeapReference(temp2);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // value = ReadBarrier::Mark(value)
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(temp2, ESP);
+ DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
+ // This runtime call does not require a stack map.
+ x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(temp2);
+ // dest_array[i + dest_pos] = value
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ int32_t adjusted_offset = offset + constant * element_size;
+ __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
+ } else {
+ __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+ __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
+ }
+ // ++i
+ __ addl(temp1, Immediate(1));
+ // if (i != length) goto loop
+ x86_codegen->GenerateIntCompare(temp1_loc, length);
+ __ j(kNotEqual, &loop);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
+};
+
+#undef __
+
#define __ assembler->
static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
@@ -1835,10 +1934,9 @@ static void GenUnsafeGet(HInvoke* invoke,
Register output = output_loc.AsRegister<Register>();
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
- Location temp = locations->GetTemp(0);
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
- invoke, output_loc, base, src, temp, /* needs_null_check */ false);
+ invoke, output_loc, base, src, /* needs_null_check */ false);
} else {
__ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
codegen->GenerateReadBarrierSlow(
@@ -1901,11 +1999,6 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
locations->SetOut(Location::RequiresRegister(),
can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
- if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- // We need a temporary register for the read barrier marking slow
- // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
- }
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
@@ -2678,9 +2771,9 @@ static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1)
}
void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2710,9 +2803,9 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
X86Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2721,17 +2814,21 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = locations->InAt(0).AsRegister<Register>();
Location src_pos = locations->InAt(1);
Register dest = locations->InAt(2).AsRegister<Register>();
Location dest_pos = locations->InAt(3);
- Location length = locations->InAt(4);
- Register temp1 = locations->GetTemp(0).AsRegister<Register>();
- Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ Location length_arg = locations->InAt(4);
+ Location length = length_arg;
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Location temp2_loc = locations->GetTemp(1);
+ Register temp2 = temp2_loc.AsRegister<Register>();
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
NearLabel conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -2747,7 +2844,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmpl(src, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2755,7 +2852,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotEqual, &conditions_on_positions_validated);
}
__ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
- __ j(kGreater, slow_path->GetEntryLabel());
+ __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2765,10 +2862,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
if (dest_pos.IsConstant()) {
int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
__ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
} else {
__ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
}
@@ -2777,16 +2874,17 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
__ testl(src, src);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
__ testl(dest, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
- Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+ Location temp3_loc = locations->GetTemp(2);
+ Register temp3 = temp3_loc.AsRegister<Register>();
if (length.IsStackSlot()) {
__ movl(temp3, Address(ESP, length.GetStackIndex()));
length = Location::RegisterLocation(temp3);
@@ -2798,7 +2896,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
// Validity checks: source.
@@ -2806,7 +2904,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -2815,7 +2913,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -2824,72 +2922,159 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
+
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ testl(temp1, temp1);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp1);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
__ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(dest, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp2 = temp1->component_type_
- __ movl(temp2, Address(temp1, component_offset));
- __ testl(temp2, temp2);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp2);
- __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- // Re-poison the heap reference to make the compare instruction below
- // compare two poisoned references.
- __ PoisonHeapReference(temp1);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (length.Equals(Location::RegisterLocation(temp3))) {
+ // When Baker read barriers are enabled, register `temp3`,
+ // which in the present case contains the `length` parameter,
+ // will be overwritten below. Make the `length` location
+ // reference the original stack location; it will be moved
+ // back to `temp3` later if necessary.
+ DCHECK(length_arg.IsStackSlot());
+ length = length_arg;
+ }
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false);
+ __ testl(temp2, temp2);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ cmpl(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ NearLabel do_copy;
+ __ j(kEqual, &do_copy);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
} else {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(dest, class_offset));
- }
+ // Non read barrier code.
- // Note: if poisoning is on, we are here comparing two poisoned references.
- __ cmpl(temp1, Address(src, class_offset));
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ movl(temp1, Address(dest, class_offset));
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ __ movl(temp2, Address(temp1, component_offset));
+ __ testl(temp2, temp2);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp2);
+ __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ // Re-poison the heap reference to make the compare instruction below
+ // compare two poisoned references.
+ __ PoisonHeapReference(temp1);
+ }
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- NearLabel do_copy;
- __ j(kEqual, &do_copy);
+ // Note: if heap poisoning is on, we are comparing two poisoned references here.
+ __ cmpl(temp1, Address(src, class_offset));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ NearLabel do_copy;
+ __ j(kEqual, &do_copy);
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
+ }
+ } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+ // Bail out if the source is not a non primitive array.
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
__ MaybeUnpoisonHeapReference(temp1);
// /* HeapReference<Class> */ temp1 = temp1->component_type_
__ movl(temp1, Address(temp1, component_offset));
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
__ MaybeUnpoisonHeapReference(temp1);
- __ cmpl(Address(temp1, super_offset), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
}
- } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ testl(temp1, temp1);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp1);
__ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- // Compute base source address, base destination address, and end source address.
+ // Compute the base source address in `temp1`.
int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
DCHECK_EQ(element_size, 4);
uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
@@ -2900,35 +3085,138 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
}
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp2, Address(dest, element_size * constant + offset));
- } else {
- __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // If it is needed (in the case of the fast-path loop), the base
+ // destination address is computed later, as `temp2` is used for
+ // intermediate computations.
- if (length.IsConstant()) {
- int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp3, Address(temp1, element_size * constant));
+ // Compute the end source address in `temp3`.
+ if (length.IsConstant()) {
+ int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp3, Address(temp1, element_size * constant));
+ } else {
+ if (length.IsStackSlot()) {
+ // Location `length` is again pointing at a stack slot, as
+ // register `temp3` (which was containing the length parameter
+ // earlier) has been overwritten; restore it now
+ DCHECK(length.Equals(length_arg));
+ __ movl(temp3, Address(ESP, length.GetStackIndex()));
+ length = Location::RegisterLocation(temp3);
+ }
+ __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+ }
+
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // for (size_t i = 0; i != length; ++i) {
+ // dest_array[dest_pos + i] =
+ // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
+ // }
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ NearLabel loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
+ constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
+ constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // goto slow_path;
+ // At this point, just do the "if" and make sure that flags are preserved until the branch.
+ __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
+
+ // Load fence to prevent load-load reordering.
+ // Note that this is a no-op, thanks to the x86 memory model.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // We have done the "if" of the gray bit check above, now branch based on the flags.
+ __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+
+ // Set the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp2, Address(dest, element_size * constant + offset));
+ } else {
+ __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ pushl(Address(temp1, 0));
+ __ cfi().AdjustCFAOffset(4);
+ __ popl(Address(temp2, 0));
+ __ cfi().AdjustCFAOffset(-4);
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
} else {
- __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
- }
-
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- NearLabel loop, done;
- __ cmpl(temp1, temp3);
- __ j(kEqual, &done);
- __ Bind(&loop);
- __ pushl(Address(temp1, 0));
- __ cfi().AdjustCFAOffset(4);
- __ popl(Address(temp2, 0));
- __ cfi().AdjustCFAOffset(-4);
- __ addl(temp1, Immediate(element_size));
- __ addl(temp2, Immediate(element_size));
- __ cmpl(temp1, temp3);
- __ j(kNotEqual, &loop);
- __ Bind(&done);
+ // Non read barrier code.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp2, Address(dest, element_size * constant + offset));
+ } else {
+ __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+ }
+
+ // Compute the end source address in `temp3`.
+ if (length.IsConstant()) {
+ int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp3, Address(temp1, element_size * constant));
+ } else {
+ __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ NearLabel loop, done;
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+ __ Bind(&loop);
+ __ pushl(Address(temp1, 0));
+ __ cfi().AdjustCFAOffset(4);
+ __ popl(Address(temp2, 0));
+ __ cfi().AdjustCFAOffset(-4);
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -2937,7 +3225,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
Register(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ab8b05c3d4..a4ee546237 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -64,6 +64,65 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+ CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
+ CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
+
+ __ Bind(GetEntryLabel());
+ NearLabel loop;
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ // TODO: Inline the mark bit check before calling the runtime?
+ // TMP = ReadBarrier::Mark(TMP);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
+ // This runtime call does not require a stack map.
+ x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(CpuRegister(TMP));
+ __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
+ __ addl(src_curr_addr, Immediate(element_size));
+ __ addl(dst_curr_addr, Immediate(element_size));
+ __ cmpl(src_curr_addr, src_stop_addr);
+ __ j(kNotEqual, &loop);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
+};
+
+#undef __
+
#define __ assembler->
static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1053,9 +1112,9 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1063,9 +1122,9 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
X86_64Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1074,18 +1133,23 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
Location src_pos = locations->InAt(1);
CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
- CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
- CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
- CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
+ Location temp1_loc = locations->GetTemp(0);
+ CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
+ Location temp2_loc = locations->GetTemp(1);
+ CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+ Location temp3_loc = locations->GetTemp(2);
+ CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
+ Location TMP_loc = Location::RegisterLocation(TMP);
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
NearLabel conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -1101,7 +1165,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmpl(src, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1109,7 +1173,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotEqual, &conditions_on_positions_validated);
}
__ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
- __ j(kGreater, slow_path->GetEntryLabel());
+ __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1119,10 +1183,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (dest_pos.IsConstant()) {
int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
__ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
} else {
__ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
}
@@ -1131,13 +1195,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
__ testl(src, src);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
__ testl(dest, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
// If the length is negative, bail out.
@@ -1146,7 +1210,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
// Validity checks: source.
@@ -1154,7 +1218,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -1163,7 +1227,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -1172,38 +1236,80 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ movl(temp1, Address(dest, class_offset));
- __ movl(temp2, Address(src, class_offset));
+
bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- __ MaybeUnpoisonHeapReference(temp1);
- __ MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
+ // If heap poisoning is enabled, `temp1` and `temp2` have been
+ // unpoisoned by the the previous calls to
+ // GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ movl(temp1, Address(dest, class_offset));
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ movl(temp2, Address(src, class_offset));
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
}
if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
// Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ TMP = temp1->component_type_
- __ movl(CpuRegister(TMP), Address(temp1, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `TMP` has been unpoisoned by
+ // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ TMP = temp2->component_type_
- __ movl(CpuRegister(TMP), Address(temp2, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ TMP = temp2->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `TMP` has been unpoisoned by
+ // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ TMP = temp2->component_type_
+ __ movl(CpuRegister(TMP), Address(temp2, component_offset));
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
__ cmpl(temp1, temp2);
@@ -1211,34 +1317,56 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (optimizations.GetDestinationIsTypedObjectArray()) {
NearLabel do_copy;
__ j(kEqual, &do_copy);
- if (!did_unpoison) {
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ } else {
+ if (!did_unpoison) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
__ MaybeUnpoisonHeapReference(temp1);
+ // No need to unpoison the following heap reference load, as
+ // we're comparing against null.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ movl(temp1, Address(temp1, super_offset));
- // No need to unpoison the result, we're comparing against null.
- __ testl(temp1, temp1);
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
__ Bind(&do_copy);
} else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ TMP = temp1->component_type_
- __ movl(CpuRegister(TMP), Address(temp1, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+ // No need to unpoison `TMP` now, as we're comparing against null.
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
// Compute base source address, base destination address, and end source address.
@@ -1266,19 +1394,88 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
__ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
}
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- NearLabel loop, done;
- __ cmpl(temp1, temp3);
- __ j(kEqual, &done);
- __ Bind(&loop);
- __ movl(CpuRegister(TMP), Address(temp1, 0));
- __ movl(Address(temp2, 0), CpuRegister(TMP));
- __ addl(temp1, Immediate(element_size));
- __ addl(temp2, Immediate(element_size));
- __ cmpl(temp1, temp3);
- __ j(kNotEqual, &loop);
- __ Bind(&done);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ NearLabel loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
+ constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
+ constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // goto slow_path;
+ // At this point, just do the "if" and make sure that flags are preserved until the branch.
+ __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
+
+ // Load fence to prevent load-load reordering.
+ // Note that this is a no-op, thanks to the x86-64 memory model.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // We have done the "if" of the gray bit check above, now branch based on the flags.
+ __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(temp1, 0));
+ __ movl(Address(temp2, 0), CpuRegister(TMP));
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ NearLabel loop, done;
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(temp1, 0));
+ __ movl(Address(temp2, 0), CpuRegister(TMP));
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -1287,7 +1484,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
CpuRegister(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
@@ -1892,10 +2089,9 @@ static void GenUnsafeGet(HInvoke* invoke,
case Primitive::kPrimNot: {
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
- Location temp = locations->GetTemp(0);
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
- invoke, output_loc, base, src, temp, /* needs_null_check */ false);
+ invoke, output_loc, base, src, /* needs_null_check */ false);
} else {
__ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
codegen->GenerateReadBarrierSlow(
@@ -1918,9 +2114,7 @@ static void GenUnsafeGet(HInvoke* invoke,
}
}
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
- HInvoke* invoke,
- Primitive::Type type) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
bool can_call = kEmitCompilerReadBarrier &&
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -1934,30 +2128,25 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(),
can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
- if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- // We need a temporary register for the read barrier marking slow
- // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
- }
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
+ CreateIntIntIntToIntLocations(arena_, invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
+ CreateIntIntIntToIntLocations(arena_, invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
+ CreateIntIntIntToIntLocations(arena_, invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
+ CreateIntIntIntToIntLocations(arena_, invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
+ CreateIntIntIntToIntLocations(arena_, invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
+ CreateIntIntIntToIntLocations(arena_, invoke);
}
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index a6d234d739..8c0231e1aa 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -157,13 +157,26 @@ class OptimizingCFITest : public CFITest {
TestImpl(isa, #isa, expected_asm, expected_cfi); \
}
+#ifdef ART_ENABLE_CODEGEN_arm
TEST_ISA(kThumb2)
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
TEST_ISA(kArm64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
TEST_ISA(kX86)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
TEST_ISA(kX86_64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
TEST_ISA(kMips)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
TEST_ISA(kMips64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm
TEST_F(OptimizingCFITest, kThumb2Adjust) {
std::vector<uint8_t> expected_asm(
expected_asm_kThumb2_adjust,
@@ -184,7 +197,9 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) {
Finish();
Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi);
}
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
TEST_F(OptimizingCFITest, kMipsAdjust) {
// One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
static constexpr size_t kNumNops = 1u + (1u << 15);
@@ -212,7 +227,9 @@ TEST_F(OptimizingCFITest, kMipsAdjust) {
Finish();
Check(kMips, "kMips_adjust", expected_asm, expected_cfi);
}
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
TEST_F(OptimizingCFITest, kMips64Adjust) {
// One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
static constexpr size_t kNumNops = 1u + (1u << 15);
@@ -240,6 +257,7 @@ TEST_F(OptimizingCFITest, kMips64Adjust) {
Finish();
Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi);
}
+#endif
#endif // ART_TARGET_ANDROID
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 698b0b6d43..f7c325ed93 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -428,8 +428,14 @@ static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
|| instruction_set == kX86_64;
}
+// Strip pass name suffix to get optimization name.
+static std::string ConvertPassNameToOptimizationName(const std::string& pass_name) {
+ size_t pos = pass_name.find(kPassNameSeparator);
+ return pos == std::string::npos ? pass_name : pass_name.substr(0, pos);
+}
+
static HOptimization* BuildOptimization(
- const std::string& opt_name,
+ const std::string& pass_name,
ArenaAllocator* arena,
HGraph* graph,
OptimizingCompilerStats* stats,
@@ -439,6 +445,7 @@ static HOptimization* BuildOptimization(
StackHandleScopeCollection* handles,
SideEffectsAnalysis* most_recent_side_effects,
HInductionVarAnalysis* most_recent_induction) {
+ std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) {
CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr);
return new (arena) BoundsCheckElimination(graph,
@@ -446,11 +453,11 @@ static HOptimization* BuildOptimization(
most_recent_induction);
} else if (opt_name == GVNOptimization::kGlobalValueNumberingPassName) {
CHECK(most_recent_side_effects != nullptr);
- return new (arena) GVNOptimization(graph, *most_recent_side_effects);
+ return new (arena) GVNOptimization(graph, *most_recent_side_effects, pass_name.c_str());
} else if (opt_name == HConstantFolding::kConstantFoldingPassName) {
- return new (arena) HConstantFolding(graph);
+ return new (arena) HConstantFolding(graph, pass_name.c_str());
} else if (opt_name == HDeadCodeElimination::kDeadCodeEliminationPassName) {
- return new (arena) HDeadCodeElimination(graph, stats);
+ return new (arena) HDeadCodeElimination(graph, stats, pass_name.c_str());
} else if (opt_name == HInliner::kInlinerPassName) {
size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_;
return new (arena) HInliner(graph, // outer_graph
@@ -470,7 +477,7 @@ static HOptimization* BuildOptimization(
} else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
return new (arena) HInductionVarAnalysis(graph);
} else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
- return new (arena) InstructionSimplifier(graph, stats);
+ return new (arena) InstructionSimplifier(graph, stats, pass_name.c_str());
} else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
return new (arena) IntrinsicsRecognizer(graph, driver, stats);
} else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
@@ -522,12 +529,9 @@ static ArenaVector<HOptimization*> BuildOptimizations(
SideEffectsAnalysis* most_recent_side_effects = nullptr;
HInductionVarAnalysis* most_recent_induction = nullptr;
ArenaVector<HOptimization*> ret(arena->Adapter());
- for (std::string pass_name : pass_names) {
- size_t pos = pass_name.find(kPassNameSeparator); // Strip suffix to get base pass name.
- std::string opt_name = pos == std::string::npos ? pass_name : pass_name.substr(0, pos);
-
+ for (const std::string& pass_name : pass_names) {
HOptimization* opt = BuildOptimization(
- opt_name,
+ pass_name,
arena,
graph,
stats,
@@ -540,6 +544,7 @@ static ArenaVector<HOptimization*> BuildOptimizations(
CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\"";
ret.push_back(opt);
+ std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt);
} else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index b73f73893c..6effc306dc 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -279,8 +279,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
const DexFile& dex_file = load_string->GetDexFile();
uint32_t string_index = load_string->GetStringIndex();
- bool is_in_dex_cache = false;
- HLoadString::LoadKind desired_load_kind;
+ HLoadString::LoadKind desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
uint64_t address = 0u; // String or dex cache element address.
{
Runtime* runtime = Runtime::Current();
@@ -296,33 +295,14 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
DCHECK(!runtime->UseJitCompilation());
mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache);
CHECK(string != nullptr);
- if (!compiler_driver_->GetSupportBootImageFixup()) {
- // MIPS/MIPS64 or compiler_driver_test. Do not sharpen.
- desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
- } else {
- DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
- is_in_dex_cache = true;
- desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
- ? HLoadString::LoadKind::kBootImageLinkTimePcRelative
- : HLoadString::LoadKind::kBootImageLinkTimeAddress;
- }
+ // TODO: In follow up CL, add PcRelative and Address back in.
} else if (runtime->UseJitCompilation()) {
// TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
// DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
mirror::String* string = dex_cache->GetResolvedString(string_index);
- is_in_dex_cache = (string != nullptr);
if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
- // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
address = reinterpret_cast64<uint64_t>(string);
- } else {
- // Note: If the string is not in the dex cache, the instruction needs environment
- // and will not be inlined across dex files. Within a dex file, the slow-path helper
- // loads the correct string and inlined frames are used correctly for OOM stack trace.
- // TODO: Write a test for this. Bug: 29416588
- desired_load_kind = HLoadString::LoadKind::kDexCacheAddress;
- void* dex_cache_element_address = &dex_cache->GetStrings()[string_index];
- address = reinterpret_cast64<uint64_t>(dex_cache_element_address);
}
} else {
// AOT app compilation. Try to lookup the string without allocating if not found.
@@ -332,19 +312,9 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
!codegen_->GetCompilerOptions().GetCompilePic()) {
desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
address = reinterpret_cast64<uint64_t>(string);
- } else {
- // Not JIT and either the string is not in boot image or we are compiling in PIC mode.
- // Use PC-relative load from the dex cache if the dex file belongs
- // to the oat file that we're currently compiling.
- desired_load_kind = ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)
- ? HLoadString::LoadKind::kDexCachePcRelative
- : HLoadString::LoadKind::kDexCacheViaMethod;
}
}
}
- if (is_in_dex_cache) {
- load_string->MarkInDexCache();
- }
HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind);
switch (load_kind) {