Jit Code Cache instruction pipeline flushing
Restores instruction pipeline flushing on all cores following crashes
on ARMv7 with dual JIT code page mappings. We were inadvertantly
toggling permission on a non-executable page rather than executable.
Removes the data cache flush for roots data and replaces it with a
sequentially consistent barrier.
Fix MemMap::RemapAtEnd() when all pages are given out. To meet
invariants checked in the destructor, the base pointer needs to be
assigned as nullptr when this happens.
Bug: 63833411
Bug: 62332932
Test: art/test.py --target
Change-Id: I705cf5a3c80e78c4e912ea3d2c3c4aa89dee26bb
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b76a0df..f4b67b2 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -1210,14 +1210,14 @@
uint8_t* stack_map_data = nullptr;
uint8_t* method_info_data = nullptr;
uint8_t* roots_data = nullptr;
- uint32_t data_size = code_cache->ReserveData(self,
- stack_map_size,
- method_info_size,
- number_of_roots,
- method,
- &stack_map_data,
- &method_info_data,
- &roots_data);
+ code_cache->ReserveData(self,
+ stack_map_size,
+ method_info_size,
+ number_of_roots,
+ method,
+ &stack_map_data,
+ &method_info_data,
+ &roots_data);
if (stack_map_data == nullptr || roots_data == nullptr) {
return false;
}
@@ -1238,7 +1238,6 @@
codegen->GetFpuSpillMask(),
code_allocator.GetMemory().data(),
code_allocator.GetSize(),
- data_size,
osr,
roots,
codegen->GetGraph()->HasShouldDeoptimizeFlag(),
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 27501b9..653fb87 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -121,6 +121,10 @@
return nullptr;
}
+ // Align both capacities to page size, as that's the unit mspaces use.
+ initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
+ max_capacity = RoundDown(max_capacity, 2 * kPageSize);
+
std::string error_str;
// Map name specific for android_os_Debug.cpp accounting.
// Map in low 4gb to simplify accessing root tables for x86_64.
@@ -142,22 +146,21 @@
return nullptr;
}
- // Align both capacities to page size, as that's the unit mspaces use.
- initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
- max_capacity = RoundDown(max_capacity, 2 * kPageSize);
-
// Create a region for JIT data and executable code. This will be
// laid out as:
//
// +----------------+ --------------------
- // : : ^ ^
+ // | code_sync_map_ | ^ code_sync_size ^
+ // | | v |
+ // +----------------+ -- |
+ // : : ^ |
// : post_code_map : | post_code_size |
// : [padding] : v |
// +----------------+ - |
// | | ^ |
- // | code_map | | code_size |
+ // | code_map | | code_size | total_mapping_size
// | [JIT Code] | v |
- // +----------------+ - | total_mapping_size
+ // +----------------+ - |
// : : ^ |
// : pre_code_map : | pre_code_size |
// : [padding] : v |
@@ -167,17 +170,23 @@
// | [Jit Data] | v v
// +----------------+ --------------------
//
+ // The code_sync_map_ contains a page that we use flush CPU instruction
+ // pipelines (see FlushInstructionPipelines()).
+ //
// The padding regions - pre_code_map and post_code_map - exist to
// put some random distance between the writable JIT code mapping
// and the executable mapping. The padding is discarded at the end
// of this function.
- size_t total_mapping_size = kMaxMapSpacingPages * kPageSize;
- size_t data_size = RoundUp((max_capacity - total_mapping_size) / 2, kPageSize);
+ //
+ size_t data_size = (max_capacity - kMaxMapSpacingPages * kPageSize) / 2;
size_t pre_code_size =
- GetRandomNumber(kMinMapSpacingPages, kMaxMapSpacingPages) * kPageSize;
- size_t code_size = max_capacity - total_mapping_size - data_size;
- size_t post_code_size = total_mapping_size - pre_code_size;
- DCHECK_EQ(code_size + data_size + total_mapping_size, max_capacity);
+ GetRandomNumber(kMinMapSpacingPages, kMaxMapSpacingPages - 1) * kPageSize;
+ size_t code_size = max_capacity - data_size - kMaxMapSpacingPages * kPageSize;
+ size_t code_sync_size = kPageSize;
+ size_t post_code_size = kMaxMapSpacingPages * kPageSize - pre_code_size - code_sync_size;
+ DCHECK_EQ(data_size, code_size);
+ DCHECK_EQ(pre_code_size + post_code_size + code_sync_size, kMaxMapSpacingPages * kPageSize);
+ DCHECK_EQ(data_size + pre_code_size + code_size + post_code_size + code_sync_size, max_capacity);
// Create pre-code padding region after data region, discarded after
// code and data regions are set-up.
@@ -191,7 +200,7 @@
return nullptr;
}
DCHECK_EQ(data_map->Size(), data_size);
- DCHECK_EQ(pre_code_map->Size(), pre_code_size + code_size + post_code_size);
+ DCHECK_EQ(pre_code_map->Size(), pre_code_size + code_size + post_code_size + code_sync_size);
// Create code region.
unique_fd writable_code_fd;
@@ -206,7 +215,7 @@
return nullptr;
}
DCHECK_EQ(pre_code_map->Size(), pre_code_size);
- DCHECK_EQ(code_map->Size(), code_size + post_code_size);
+ DCHECK_EQ(code_map->Size(), code_size + post_code_size + code_sync_size);
// Padding after code region, discarded after code and data regions
// are set-up.
@@ -220,7 +229,19 @@
return nullptr;
}
DCHECK_EQ(code_map->Size(), code_size);
+ DCHECK_EQ(post_code_map->Size(), post_code_size + code_sync_size);
+
+ std::unique_ptr<MemMap> code_sync_map(SplitMemMap(post_code_map.get(),
+ "jit-code-sync",
+ post_code_size,
+ kProtCode,
+ error_msg,
+ use_ashmem));
+ if (code_sync_map == nullptr) {
+ return nullptr;
+ }
DCHECK_EQ(post_code_map->Size(), post_code_size);
+ DCHECK_EQ(code_sync_map->Size(), code_sync_size);
std::unique_ptr<MemMap> writable_code_map;
if (use_two_mappings) {
@@ -246,6 +267,7 @@
return new JitCodeCache(writable_code_map.release(),
code_map.release(),
data_map.release(),
+ code_sync_map.release(),
code_size,
data_size,
max_capacity,
@@ -255,6 +277,7 @@
JitCodeCache::JitCodeCache(MemMap* writable_code_map,
MemMap* executable_code_map,
MemMap* data_map,
+ MemMap* code_sync_map,
size_t initial_code_capacity,
size_t initial_data_capacity,
size_t max_capacity,
@@ -265,6 +288,7 @@
data_map_(data_map),
executable_code_map_(executable_code_map),
writable_code_map_(writable_code_map),
+ code_sync_map_(code_sync_map),
max_capacity_(max_capacity),
current_capacity_(initial_code_capacity + initial_data_capacity),
code_end_(initial_code_capacity),
@@ -382,7 +406,7 @@
class ScopedCodeCacheWrite : ScopedTrace {
public:
- explicit ScopedCodeCacheWrite(JitCodeCache* code_cache, bool only_for_tlb_shootdown = false)
+ explicit ScopedCodeCacheWrite(JitCodeCache* code_cache)
: ScopedTrace("ScopedCodeCacheWrite") {
ScopedTrace trace("mprotect all");
int prot_to_start_writing = kProtAll;
@@ -398,7 +422,7 @@
writable_map_ = code_cache->GetWritableMemMap();
// If we're using ScopedCacheWrite only for TLB shootdown, we limit the scope of mprotect to
// one page.
- size_ = only_for_tlb_shootdown ? kPageSize : writable_map_->Size();
+ size_ = writable_map_->Size();
CHECKED_MPROTECT(writable_map_->Begin(), size_, prot_to_start_writing);
}
~ScopedCodeCacheWrite() {
@@ -424,7 +448,6 @@
size_t fp_spill_mask,
const uint8_t* code,
size_t code_size,
- size_t data_size,
bool osr,
Handle<mirror::ObjectArray<mirror::Object>> roots,
bool has_should_deoptimize_flag,
@@ -439,7 +462,6 @@
fp_spill_mask,
code,
code_size,
- data_size,
osr,
roots,
has_should_deoptimize_flag,
@@ -457,7 +479,6 @@
fp_spill_mask,
code,
code_size,
- data_size,
osr,
roots,
has_should_deoptimize_flag,
@@ -744,6 +765,20 @@
method->SetCounter(std::min(jit_warmup_threshold - 1, 1));
}
+static void FlushInstructionPiplines(uint8_t* sync_page) {
+ // After updating the JIT code cache we need to force all CPUs to
+ // flush their instruction pipelines. In the absence of system call
+ // to do this explicitly, we can achieve this indirectly by toggling
+ // permissions on an executable page. This should send an IPI to
+ // each core to update the TLB entry with the interrupt raised on
+ // each core causing the instruction pipeline to be flushed.
+ CHECKED_MPROTECT(sync_page, kPageSize, kProtAll);
+ // Ensure the sync_page is present otherwise a TLB update may not be
+ // necessary.
+ sync_page[0] = 0;
+ CHECKED_MPROTECT(sync_page, kPageSize, kProtCode);
+}
+
#ifdef __aarch64__
static void FlushJitCodeCacheRange(uint8_t* code_ptr,
@@ -835,7 +870,6 @@
size_t fp_spill_mask,
const uint8_t* code,
size_t code_size,
- size_t data_size,
bool osr,
Handle<mirror::ObjectArray<mirror::Object>> roots,
bool has_should_deoptimize_flag,
@@ -878,6 +912,7 @@
code_size);
FlushJitCodeCacheRange(code_ptr, writable_ptr, code_size);
+ FlushInstructionPiplines(code_sync_map_->Begin());
DCHECK(!Runtime::Current()->IsAotCompiler());
if (has_should_deoptimize_flag) {
@@ -927,13 +962,10 @@
DCHECK_EQ(FromStackMapToRoots(stack_map), roots_data);
DCHECK_LE(roots_data, stack_map);
FillRootTable(roots_data, roots);
- {
- // Flush data cache, as compiled code references literals in it.
- // We also need a TLB shootdown to act as memory barrier across cores.
- ScopedCodeCacheWrite ccw(this, /* only_for_tlb_shootdown */ true);
- FlushDataCache(reinterpret_cast<char*>(roots_data),
- reinterpret_cast<char*>(roots_data + data_size));
- }
+
+ // Ensure the updates to the root table are visible with a store fence.
+ QuasiAtomic::ThreadFenceSequentiallyConsistent();
+
method_code_map_.Put(code_ptr, method);
if (osr) {
number_of_osr_compilations_++;
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index a062ce4..175501f 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -113,7 +113,6 @@
size_t fp_spill_mask,
const uint8_t* code,
size_t code_size,
- size_t data_size,
bool osr,
Handle<mirror::ObjectArray<mirror::Object>> roots,
bool has_should_deoptimize_flag,
@@ -255,6 +254,7 @@
JitCodeCache(MemMap* code_map,
MemMap* data_map,
MemMap* writable_code_map,
+ MemMap* code_sync_map,
size_t initial_code_capacity,
size_t initial_data_capacity,
size_t max_capacity,
@@ -272,7 +272,6 @@
size_t fp_spill_mask,
const uint8_t* code,
size_t code_size,
- size_t data_size,
bool osr,
Handle<mirror::ObjectArray<mirror::Object>> roots,
bool has_should_deoptimize_flag,
@@ -383,6 +382,9 @@
std::unique_ptr<MemMap> executable_code_map_;
// Mem map which holds a non-executable view of code for JIT.
std::unique_ptr<MemMap> writable_code_map_;
+ // Mem map which holds one executable page that we use for flushing instruction
+ // fetch buffers. The code on this page is never executed.
+ std::unique_ptr<MemMap> code_sync_map_;
// The opaque mspace for allocating code.
void* code_mspace_ GUARDED_BY(lock_);
// The opaque mspace for allocating data.
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 17035dd..1ef72ba 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -497,7 +497,7 @@
MEMORY_TOOL_MAKE_UNDEFINED(base_begin_, base_size_);
int result = munmap(base_begin_, base_size_);
if (result == -1) {
- PLOG(FATAL) << "munmap failed";
+ PLOG(FATAL) << "munmap failed: " << BaseBegin() << "..." << BaseEnd();
}
}
@@ -561,6 +561,12 @@
size_ = new_end - reinterpret_cast<uint8_t*>(begin_);
base_size_ = new_base_end - reinterpret_cast<uint8_t*>(base_begin_);
DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_);
+ if (base_size_ == 0u) {
+ // All pages in this MemMap have been handed out. Invalidate base
+ // pointer to prevent the destructor calling munmap() on
+ // zero-length region (which can't succeed).
+ base_begin_ = nullptr;
+ }
size_t tail_size = old_end - new_end;
uint8_t* tail_base_begin = new_base_end;
size_t tail_base_size = old_base_end - new_base_end;