Make .bss stores atomic release operations.
And rely on architecture-dependent behavior for the .bss
entry loads.
This fixes theoretical races when one thread updates the
.bss entry and another uses it immediately thereafter;
previously we did not ensure correct memory visibility.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Change-Id: Ie7b7969eb355025b9c9205f8c936e702861943f4
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 651a3f7..bebeb7d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -4100,6 +4100,7 @@
// Add LDR with its PC-relative .bss entry patch.
vixl::aarch64::Label* ldr_label =
NewMethodBssEntryPatch(target_method, adrp_label);
+ // All aligned loads are implicitly atomic consume operations on ARM64.
EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
break;
}
@@ -4689,6 +4690,7 @@
vixl::aarch64::Label* ldr_label =
codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
// /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
+ // All aligned loads are implicitly atomic consume operations on ARM64.
codegen_->GenerateGcRootFieldLoad(cls,
out_loc,
temp,
@@ -4863,6 +4865,7 @@
vixl::aarch64::Label* ldr_label =
codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
// /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */
+ // All aligned loads are implicitly atomic consume operations on ARM64.
codegen_->GenerateGcRootFieldLoad(load,
out_loc,
temp,
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index ac09183..99645a4 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -7034,6 +7034,7 @@
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
codegen_->EmitMovwMovtPlaceholder(labels, out);
+ // All aligned loads are implicitly atomic consume operations on ARM.
codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset= */ 0, read_barrier_option);
generate_null_check = true;
break;
@@ -7260,6 +7261,7 @@
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
codegen_->EmitMovwMovtPlaceholder(labels, out);
+ // All aligned loads are implicitly atomic consume operations on ARM.
codegen_->GenerateGcRootFieldLoad(
load, out_loc, out, /* offset= */ 0, kCompilerReadBarrierOption);
LoadStringSlowPathARMVIXL* slow_path =
@@ -8730,6 +8732,7 @@
MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
vixl32::Register temp_reg = RegisterFrom(temp);
EmitMovwMovtPlaceholder(labels, temp_reg);
+ // All aligned loads are implicitly atomic consume operations on ARM.
GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
break;
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index d71b694..112d710 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4874,6 +4874,7 @@
temp.AsRegister<Register>());
__ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
RecordMethodBssEntryPatch(invoke);
+ // No need for memory fence, thanks to the x86 memory model.
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
@@ -6617,6 +6618,7 @@
Address address(method_address, CodeGeneratorX86::kDummy32BitOffset);
Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
+ // No need for memory fence, thanks to the x86 memory model.
generate_null_check = true;
break;
}
@@ -6814,6 +6816,7 @@
Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
// /* GcRoot<mirror::String> */ out = *address /* PC-relative */
GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+ // No need for memory fence, thanks to the x86 memory model.
SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
codegen_->AddSlowPath(slow_path);
__ testl(out, out);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index bdd080b..17edc74 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1015,6 +1015,7 @@
__ movq(temp.AsRegister<CpuRegister>(),
Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
RecordMethodBssEntryPatch(invoke);
+ // No need for memory fence, thanks to the x86-64 memory model.
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
@@ -5980,6 +5981,7 @@
Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
// /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
+ // No need for memory fence, thanks to the x86-64 memory model.
generate_null_check = true;
break;
}
@@ -6133,6 +6135,7 @@
Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
// /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+ // No need for memory fence, thanks to the x86-64 memory model.
SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
codegen_->AddSlowPath(slow_path);
__ testl(out, out);