Flush data cache after updating the JIT table.

Otherwise, other cores could see an incomplete table when executing
the new code.

bug: 37949368
Test: test.py --jit
Change-Id: I29fb62f4a13c6db509a1f5719b05ee9297a403ba
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 5ce5447..f8bb64c 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -192,19 +192,26 @@
 class ScopedCodeCacheWrite : ScopedTrace {
-  explicit ScopedCodeCacheWrite(MemMap* code_map)
+  explicit ScopedCodeCacheWrite(MemMap* code_map, bool only_for_tlb_shootdown = false)
       : ScopedTrace("ScopedCodeCacheWrite"),
-        code_map_(code_map) {
+        code_map_(code_map),
+        only_for_tlb_shootdown_(only_for_tlb_shootdown) {
     ScopedTrace trace("mprotect all");
-    CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtAll);
+        code_map_->Begin(), only_for_tlb_shootdown_ ? kPageSize : code_map_->Size(), kProtAll);
   ~ScopedCodeCacheWrite() {
     ScopedTrace trace("mprotect code");
-    CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode);
+        code_map_->Begin(), only_for_tlb_shootdown_ ? kPageSize : code_map_->Size(), kProtCode);
   MemMap* const code_map_;
+  // If we're using ScopedCacheWrite only for TLB shootdown, we limit the scope of mprotect to
+  // one page.
+  const bool only_for_tlb_shootdown_;
@@ -565,11 +572,6 @@
-      DCHECK_EQ(FromStackMapToRoots(stack_map), roots_data);
-      DCHECK_LE(roots_data, stack_map);
-      // Flush data cache, as compiled code references literals in it.
-      FlushDataCache(reinterpret_cast<char*>(roots_data),
-                     reinterpret_cast<char*>(roots_data + data_size));
       // Flush caches before we remove write permission because some ARMv8 Qualcomm kernels may
       // trigger a segfault if a page fault occurs when requesting a cache maintenance operation.
       // This is a kernel bug that we need to work around until affected devices (e.g. Nexus 5X and
@@ -621,10 +623,18 @@
     // possible that the compiled code is considered invalidated by some class linking,
     // but below we still make the compiled code valid for the method.
     MutexLock mu(self, lock_);
-    method_code_map_.Put(code_ptr, method);
     // Fill the root table before updating the entry point.
     DCHECK_EQ(FromStackMapToRoots(stack_map), roots_data);
+    DCHECK_LE(roots_data, stack_map);
     FillRootTable(roots_data, roots);
+    {
+      // Flush data cache, as compiled code references literals in it.
+      // We also need a TLB shootdown to act as memory barrier across cores.
+      ScopedCodeCacheWrite ccw(code_map_.get(), /* only_for_tlb_shootdown */ true);
+      FlushDataCache(reinterpret_cast<char*>(roots_data),
+                     reinterpret_cast<char*>(roots_data + data_size));
+    }
+    method_code_map_.Put(code_ptr, method);
     if (osr) {
       osr_code_map_.Put(method, code_ptr);