Trim arenas for JIT
Moved arena pool into the runtime. Added arena trimming to arena
pool. When called, this madvises the used memory.
Changed the JIT compiler to trim arenas after compilation. Changed
the arena mmap name to dalvik-LinearAlloc.
Native PSS before:
80353 kB: Native
80775 kB: Native
78116 kB: Native
After:
73357 kB: Native
70181 kB: Native
70306 kB: Native
Bug: 17950037
Bug: 17643507
Bug: 19264997
Change-Id: I63e7a898fd6e909c2c677fa57b5917a7b1398930
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 13a6d9d..02d74a0 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -628,12 +628,13 @@
DCHECK(driver->GetCompilerOptions().IsCompilationEnabled());
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ Runtime* const runtime = Runtime::Current();
+ ClassLinker* const class_linker = runtime->GetClassLinker();
InstructionSet instruction_set = driver->GetInstructionSet();
if (instruction_set == kArm) {
instruction_set = kThumb2;
}
- CompilationUnit cu(driver->GetArenaPool(), instruction_set, driver, class_linker);
+ CompilationUnit cu(runtime->GetArenaPool(), instruction_set, driver, class_linker);
CHECK((cu.instruction_set == kThumb2) ||
(cu.instruction_set == kArm64) ||
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index d1291fa..78dd6cc 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2412,8 +2412,9 @@
std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
std::ostringstream oss;
- const ArenaPool* arena_pool = GetArenaPool();
- gc::Heap* heap = Runtime::Current()->GetHeap();
+ Runtime* const runtime = Runtime::Current();
+ const ArenaPool* arena_pool = runtime->GetArenaPool();
+ gc::Heap* const heap = runtime->GetHeap();
oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated());
oss << " java alloc=" << PrettySize(heap->GetBytesAllocated());
#ifdef HAVE_MALLOC_H
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index f949667..28a8245 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -362,12 +362,6 @@
support_boot_image_fixup_ = support_boot_image_fixup;
}
- ArenaPool* GetArenaPool() {
- return &arena_pool_;
- }
- const ArenaPool* GetArenaPool() const {
- return &arena_pool_;
- }
SwapAllocator<void>& GetSwapSpaceAllocator() {
return *swap_space_allocator_.get();
}
@@ -606,9 +600,6 @@
void* compiler_context_;
- // Arena pool used by the compiler.
- ArenaPool arena_pool_;
-
bool support_boot_image_fixup_;
// DeDuplication data structures, these own the corresponding byte arrays.
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 04efa21..a63e14a 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -103,7 +103,7 @@
}
bool JitCompiler::CompileMethod(Thread* self, mirror::ArtMethod* method) {
- uint64_t start_time = NanoTime();
+ const uint64_t start_time = NanoTime();
StackHandleScope<2> hs(self);
self->AssertNoPendingException();
Runtime* runtime = Runtime::Current();
@@ -130,6 +130,8 @@
}
}
CompiledMethod* compiled_method(compiler_driver_->CompileMethod(self, h_method.Get()));
+ // Trim maps to reduce memory usage, TODO: measure how much this increases compile time.
+ runtime->GetArenaPool()->TrimMaps();
if (compiled_method == nullptr) {
return false;
}
@@ -137,7 +139,7 @@
// Don't add the method if we are supposed to be deoptimized.
bool result = false;
if (!runtime->GetInstrumentation()->AreAllMethodsDeoptimized()) {
- const void* code = Runtime::Current()->GetClassLinker()->GetOatMethodQuickCodeFor(
+ const void* code = runtime->GetClassLinker()->GetOatMethodQuickCodeFor(
h_method.Get());
if (code != nullptr) {
// Already have some compiled code, just use this instead of linking.
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index e6380bf..70d138d 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -26,8 +26,9 @@
namespace art {
-// Memmap is a bit slower than malloc according to my measurements.
-static constexpr bool kUseMemMap = false;
+// Memmap is a bit slower than malloc to allocate, but this is mitigated by the arena pool which
+// only allocates few arenas and recycles them afterwards.
+static constexpr bool kUseMemMap = true;
static constexpr bool kUseMemSet = true && kUseMemMap;
static constexpr size_t kValgrindRedZoneBytes = 8;
constexpr size_t Arena::kDefaultSize;
@@ -129,8 +130,8 @@
next_(nullptr) {
if (kUseMemMap) {
std::string error_msg;
- map_ = MemMap::MapAnonymous("dalvik-arena", nullptr, size, PROT_READ | PROT_WRITE, false, false,
- &error_msg);
+ map_ = MemMap::MapAnonymous("dalvik-LinearAlloc", nullptr, size, PROT_READ | PROT_WRITE, false,
+ false, &error_msg);
CHECK(map_ != nullptr) << error_msg;
memory_ = map_->Begin();
size_ = map_->Size();
@@ -148,8 +149,15 @@
}
}
+void Arena::Release() {
+ if (kUseMemMap && bytes_allocated_ > 0) {
+ map_->MadviseDontNeedAndZero();
+ bytes_allocated_ = 0;
+ }
+}
+
void Arena::Reset() {
- if (bytes_allocated_) {
+ if (bytes_allocated_ > 0) {
if (kUseMemSet || !kUseMemMap) {
memset(Begin(), 0, bytes_allocated_);
} else {
@@ -162,6 +170,9 @@
ArenaPool::ArenaPool()
: lock_("Arena pool lock"),
free_arenas_(nullptr) {
+ if (kUseMemMap) {
+ MemMap::Init();
+ }
}
ArenaPool::~ArenaPool() {
@@ -189,6 +200,13 @@
return ret;
}
+void ArenaPool::TrimMaps() {
+ MutexLock lock(Thread::Current(), lock_);
+ for (auto* arena = free_arenas_; arena != nullptr; arena = arena->next_) {
+ arena->Release();
+ }
+}
+
size_t ArenaPool::GetBytesAllocated() const {
size_t total = 0;
MutexLock lock(Thread::Current(), lock_);
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 9237391..04ca3ea 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -118,7 +118,10 @@
static constexpr size_t kDefaultSize = 128 * KB;
explicit Arena(size_t size = kDefaultSize);
~Arena();
+ // Reset is for pre-use and uses memset for performance.
void Reset();
+ // Release is used inbetween uses and uses madvise for memory usage.
+ void Release();
uint8_t* Begin() {
return memory_;
}
@@ -160,6 +163,9 @@
Arena* AllocArena(size_t size) LOCKS_EXCLUDED(lock_);
void FreeArenaChain(Arena* first) LOCKS_EXCLUDED(lock_);
size_t GetBytesAllocated() const LOCKS_EXCLUDED(lock_);
+ // Trim the maps in arenas by madvising, used by JIT to reduce memory usage. This only works if
+ // kUseMemMap is true.
+ void TrimMaps() LOCKS_EXCLUDED(lock_);
private:
mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 70de0db..ac1040d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -251,6 +251,7 @@
VLOG(jit) << "Deleting jit";
jit_.reset(nullptr);
}
+ arena_pool_.reset();
// Shutdown the fault manager if it was initialized.
fault_manager.Shutdown();
@@ -787,6 +788,7 @@
max_spins_before_thin_lock_inflation_ =
runtime_options.GetOrDefault(Opt::MaxSpinsBeforeThinLockInflation);
+ arena_pool_.reset(new ArenaPool);
monitor_list_ = new MonitorList;
monitor_pool_ = MonitorPool::Create();
thread_list_ = new ThreadList;
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 5078b7f..4cddb5c 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -28,6 +28,7 @@
#include "arch/instruction_set.h"
#include "base/allocator.h"
+#include "base/arena_allocator.h"
#include "base/macros.h"
#include "compiler_callbacks.h"
#include "gc_root.h"
@@ -545,6 +546,13 @@
void CreateJit();
+ ArenaPool* GetArenaPool() {
+ return arena_pool_.get();
+ }
+ const ArenaPool* GetArenaPool() const {
+ return arena_pool_.get();
+ }
+
private:
static void InitPlatformSignalHandlers();
@@ -608,6 +616,8 @@
gc::Heap* heap_;
+ std::unique_ptr<ArenaPool> arena_pool_;
+
// The number of spins that are done before thread suspension is used to forcibly inflate.
size_t max_spins_before_thin_lock_inflation_;
MonitorList* monitor_list_;