Make .bss stores atomic release operations. And rely on architecture-dependent behavior for the .bss entry loads. This fixes theoretical races when one thread updates the .bss entry and another uses it immediately thereafter; previously we did not ensure correct memory visibility. Test: m test-art-host-gtest Test: testrunner.py --host --optimizing Test: aosp_taimen-userdebug boots. Test: run-gtests.sh Test: testrunner.py --target --optimizing Change-Id: Ie7b7969eb355025b9c9205f8c936e702861943f4

commit: d5fd5c3bbb44880e440c6920ce5ed56b5383c788 [log] [tgz]
author: Vladimir Marko <vmarko@google.com> Tue Jul 02 14:46:32 2019 +0100
committer: Vladimir Marko <vmarko@google.com> Thu Jul 04 08:17:30 2019 +0000
tree: 2cf8a6354e5509c5a5e0bc2937c24fe7e03a12b1
parent: 1a6f9fcce199f437a5945dfe0163188b923adb28 [diff]
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 651a3f7..bebeb7d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc

@@ -4100,6 +4100,7 @@
       // Add LDR with its PC-relative .bss entry patch.
       vixl::aarch64::Label* ldr_label =
           NewMethodBssEntryPatch(target_method, adrp_label);
+      // All aligned loads are implicitly atomic consume operations on ARM64.
       EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
       break;
     }
@@ -4689,6 +4690,7 @@
       vixl::aarch64::Label* ldr_label =
           codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
+      // All aligned loads are implicitly atomic consume operations on ARM64.
       codegen_->GenerateGcRootFieldLoad(cls,
                                         out_loc,
                                         temp,
@@ -4863,6 +4865,7 @@
       vixl::aarch64::Label* ldr_label =
           codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
+      // All aligned loads are implicitly atomic consume operations on ARM64.
       codegen_->GenerateGcRootFieldLoad(load,
                                         out_loc,
                                         temp,

diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index ac09183..99645a4 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc

@@ -7034,6 +7034,7 @@
       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
       codegen_->EmitMovwMovtPlaceholder(labels, out);
+      // All aligned loads are implicitly atomic consume operations on ARM.
       codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset= */ 0, read_barrier_option);
       generate_null_check = true;
       break;
@@ -7260,6 +7261,7 @@
       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
           codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
       codegen_->EmitMovwMovtPlaceholder(labels, out);
+      // All aligned loads are implicitly atomic consume operations on ARM.
       codegen_->GenerateGcRootFieldLoad(
           load, out_loc, out, /* offset= */ 0, kCompilerReadBarrierOption);
       LoadStringSlowPathARMVIXL* slow_path =
@@ -8730,6 +8732,7 @@
           MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
       vixl32::Register temp_reg = RegisterFrom(temp);
       EmitMovwMovtPlaceholder(labels, temp_reg);
+      // All aligned loads are implicitly atomic consume operations on ARM.
       GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
       break;
     }

diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index d71b694..112d710 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc

@@ -4874,6 +4874,7 @@
                                                                 temp.AsRegister<Register>());
       __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
       RecordMethodBssEntryPatch(invoke);
+      // No need for memory fence, thanks to the x86 memory model.
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
@@ -6617,6 +6618,7 @@
       Address address(method_address, CodeGeneratorX86::kDummy32BitOffset);
       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
+      // No need for memory fence, thanks to the x86 memory model.
       generate_null_check = true;
       break;
     }
@@ -6814,6 +6816,7 @@
       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
       // /* GcRoot<mirror::String> */ out = *address  /* PC-relative */
       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+      // No need for memory fence, thanks to the x86 memory model.
       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
       codegen_->AddSlowPath(slow_path);
       __ testl(out, out);

diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index bdd080b..17edc74 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc

@@ -1015,6 +1015,7 @@
       __ movq(temp.AsRegister<CpuRegister>(),
               Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
       RecordMethodBssEntryPatch(invoke);
+      // No need for memory fence, thanks to the x86-64 memory model.
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
@@ -5980,6 +5981,7 @@
       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
+      // No need for memory fence, thanks to the x86-64 memory model.
       generate_null_check = true;
       break;
     }
@@ -6133,6 +6135,7 @@
       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+      // No need for memory fence, thanks to the x86-64 memory model.
       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
       codegen_->AddSlowPath(slow_path);
       __ testl(out, out);

diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index e939982..838b5b5 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc

@@ -53,7 +53,10 @@
   DCHECK_LT(slot, oat_file->GetBssGcRoots().data() + oat_file->GetBssGcRoots().size());
   if (slot->IsNull()) {
     // This may race with another thread trying to store the very same value but that's OK.
-    *slot = GcRoot<mirror::Object>(object);
+    std::atomic<GcRoot<mirror::Object>>* atomic_slot =
+        reinterpret_cast<std::atomic<GcRoot<mirror::Object>>*>(slot);
+    static_assert(sizeof(*slot) == sizeof(*atomic_slot), "Size check");
+    atomic_slot->store(GcRoot<mirror::Object>(object), std::memory_order_release);
     // We need a write barrier for the class loader that holds the GC roots in the .bss.
     ObjPtr<mirror::ClassLoader> class_loader = outer_method->GetClassLoader();
     Runtime* runtime = Runtime::Current();

diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 7ac76ab..e589fc9 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc

@@ -1416,7 +1416,10 @@
         DCHECK_GE(method_entry, oat_file->GetBssMethods().data());
         DCHECK_LT(method_entry,
                   oat_file->GetBssMethods().data() + oat_file->GetBssMethods().size());
-        *method_entry = called;
+        std::atomic<ArtMethod*>* atomic_entry =
+            reinterpret_cast<std::atomic<ArtMethod*>*>(method_entry);
+        static_assert(sizeof(*method_entry) == sizeof(*atomic_entry), "Size check.");
+        atomic_entry->store(called, std::memory_order_release);
       }
     }
   }
commit	d5fd5c3bbb44880e440c6920ce5ed56b5383c788	[log] [tgz]
author	Vladimir Marko <vmarko@google.com>	Tue Jul 02 14:46:32 2019 +0100
committer	Vladimir Marko <vmarko@google.com>	Thu Jul 04 08:17:30 2019 +0000
tree	2cf8a6354e5509c5a5e0bc2937c24fe7e03a12b1
parent	1a6f9fcce199f437a5945dfe0163188b923adb28 [diff]