diff options
144 files changed, 2256 insertions, 1130 deletions
diff --git a/build/Android.common.mk b/build/Android.common.mk index 05224569b3..f58aabc0b5 100644 --- a/build/Android.common.mk +++ b/build/Android.common.mk @@ -173,22 +173,24 @@ $(error Required DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES is not set) endif ART_TARGET_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=$(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES) -# Enable thread-safety for GCC 4.6 on the target but not for GCC 4.7 where this feature was removed. +# Enable thread-safety for GCC 4.6, and clang, but not for GCC 4.7 or later where this feature was +# removed. Warn when -Wthread-safety is not used. ifneq ($(filter 4.6 4.6.%, $(TARGET_GCC_VERSION)),) ART_TARGET_CFLAGS += -Wthread-safety else - # Warn if not using GCC 4.6 for target builds when not doing a top-level or 'mma' build. - ifneq ($(ONE_SHOT_MAKEFILE),) - # Enable target GCC 4.6 with: export TARGET_GCC_VERSION_EXP=4.6 - $(info Using target GCC $(TARGET_GCC_VERSION) disables thread-safety checks.) + ifeq ($(ART_TARGET_CLANG),true) + ART_TARGET_CFLAGS += -Wthread-safety + else + # Warn if -Wthread-safety is not suport and not doing a top-level or 'mma' build. + ifneq ($(ONE_SHOT_MAKEFILE),) + # Enable target GCC 4.6 with: export TARGET_GCC_VERSION_EXP=4.6 + $(info Using target GCC $(TARGET_GCC_VERSION) disables thread-safety checks.) + endif endif endif -# We build with GCC 4.6 on the host. +# We compile with GCC 4.6 or clang on the host, both of which support -Wthread-safety. ART_HOST_CFLAGS += -Wthread-safety -# Make host builds easier to debug and profile by not omitting the frame pointer. -ART_HOST_CFLAGS += -fno-omit-frame-pointer - # To use oprofile_android --callgraph, uncomment this and recompile with "mmm art -B -j16" # ART_TARGET_CFLAGS += -fno-omit-frame-pointer -marm -mapcs diff --git a/compiler/Android.mk b/compiler/Android.mk index 499f23f6a5..fdc854016f 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -86,6 +86,7 @@ LIBART_COMPILER_SRC_FILES := \ utils/mips/managed_register_mips.cc \ utils/x86/assembler_x86.cc \ utils/x86/managed_register_x86.cc \ + utils/scoped_arena_allocator.cc \ buffered_output_stream.cc \ compiler_backend.cc \ elf_fixup.cc \ @@ -260,12 +261,6 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT endef -ifeq ($(ART_BUILD_TARGET_NDEBUG),true) - $(eval $(call build-libart-compiler,target,ndebug)) -endif -ifeq ($(ART_BUILD_TARGET_DEBUG),true) - $(eval $(call build-libart-compiler,target,debug)) -endif ifeq ($(WITH_HOST_DALVIK),true) # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target. ifeq ($(ART_BUILD_NDEBUG),true) @@ -275,6 +270,12 @@ ifeq ($(WITH_HOST_DALVIK),true) $(eval $(call build-libart-compiler,host,debug)) endif endif +ifeq ($(ART_BUILD_TARGET_NDEBUG),true) + $(eval $(call build-libart-compiler,target,ndebug)) +endif +ifeq ($(ART_BUILD_TARGET_DEBUG),true) + $(eval $(call build-libart-compiler,target,debug)) +endif # Rule to build /system/lib/libcompiler_rt.a # Usually static libraries are not installed on the device. diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index bca72b872b..def7b681dc 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -35,9 +35,9 @@ namespace art { // A signal handler called when have an illegal instruction. We record the fact in // a global boolean and then increment the PC in the signal context to return to // the next instruction. We know the instruction is an sdiv (4 bytes long). -static void baddivideinst(int signo, siginfo *si, void *data) { - (void)signo; - (void)si; +static inline void baddivideinst(int signo, siginfo *si, void *data) { + UNUSED(signo); + UNUSED(si); struct ucontext *uc = (struct ucontext *)data; struct sigcontext *sc = &uc->uc_mcontext; sc->arm_r0 = 0; // set R0 to #0 to signal error @@ -56,7 +56,7 @@ static void baddivideinst(int signo, siginfo *si, void *data) { extern "C" bool CheckForARMSDIVInstruction(); -static InstructionSetFeatures GuessInstructionFeatures() { +static inline InstructionSetFeatures GuessInstructionFeatures() { InstructionSetFeatures f; // Uncomment this for processing of /proc/cpuinfo. @@ -107,7 +107,7 @@ static InstructionSetFeatures GuessInstructionFeatures() { // Given a set of instruction features from the build, parse it. The // input 'str' is a comma separated list of feature names. Parse it and // return the InstructionSetFeatures object. -static InstructionSetFeatures ParseFeatureList(std::string str) { +static inline InstructionSetFeatures ParseFeatureList(std::string str) { InstructionSetFeatures result; typedef std::vector<std::string> FeatureList; FeatureList features; diff --git a/compiler/dex/bit_vector_block_iterator.h b/compiler/dex/bit_vector_block_iterator.h index 0821e9e238..0f1c2b6756 100644 --- a/compiler/dex/bit_vector_block_iterator.h +++ b/compiler/dex/bit_vector_block_iterator.h @@ -44,7 +44,7 @@ class BitVectorBlockIterator { BasicBlock* Next(); void* operator new(size_t size, ArenaAllocator* arena) { - return arena->Alloc(size, ArenaAllocator::kAllocGrowableArray); + return arena->Alloc(size, kArenaAllocGrowableArray); }; void operator delete(void* p) {} // Nop. diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h index ee880417ac..c71f0473f1 100644 --- a/compiler/dex/compiler_ir.h +++ b/compiler/dex/compiler_ir.h @@ -25,6 +25,7 @@ #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "safe_map.h" +#include "utils/scoped_arena_allocator.h" #include "base/timing_logger.h" #include "utils/arena_allocator.h" @@ -82,6 +83,7 @@ struct CompilationUnit { // TODO: move memory management to mir_graph, or just switch to using standard containers. ArenaAllocator arena; + ArenaStack arena_stack; // Arenas for ScopedArenaAllocator. UniquePtr<MIRGraph> mir_graph; // MIR container. UniquePtr<Backend> cg; // Target-specific codegen. diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index b55b4715eb..1c2d16f6ca 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -98,6 +98,7 @@ CompilationUnit::CompilationUnit(ArenaPool* pool) num_regs(0), compiler_flip_match(false), arena(pool), + arena_stack(pool), mir_graph(NULL), cg(NULL), timings("QuickCompiler", true, false) { @@ -247,9 +248,12 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, } if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) { - if (cu.arena.BytesAllocated() > (5 * 1024 *1024)) { - MemStats mem_stats(cu.arena); - LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats); + if (cu.arena.BytesAllocated() > (1 * 1024 *1024) || + cu.arena_stack.PeakBytesAllocated() > 256 * 1024) { + MemStats mem_stats(cu.arena.GetMemStats()); + MemStats peak_stats(cu.arena_stack.GetPeakStats()); + LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats) + << Dumpable<MemStats>(peak_stats); } } diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h index 348bedcc75..535b613ba1 100644 --- a/compiler/dex/local_value_numbering.h +++ b/compiler/dex/local_value_numbering.h @@ -18,6 +18,8 @@ #define ART_COMPILER_DEX_LOCAL_VALUE_NUMBERING_H_ #include "compiler_internals.h" +#include "UniquePtr.h" +#include "utils/scoped_arena_allocator.h" #define NO_VALUE 0xffff #define ARRAY_REF 0xfffe @@ -73,28 +75,26 @@ class LocalValueNumbering { }; // Key is s_reg, value is value name. - typedef SafeMap<uint16_t, uint16_t> SregValueMap; + typedef SafeMap<uint16_t, uint16_t, std::less<uint16_t>, + ScopedArenaAllocatorAdapter<std::pair<uint16_t, uint16_t> > > SregValueMap; // Key is concatenation of opcode, operand1, operand2 and modifier, value is value name. - typedef SafeMap<uint64_t, uint16_t> ValueMap; + typedef SafeMap<uint64_t, uint16_t, std::less<uint64_t>, + ScopedArenaAllocatorAdapter<std::pair<uint64_t, uint16_t> > > ValueMap; // Key represents a memory address, value is generation. - typedef SafeMap<MemoryVersionKey, uint16_t, MemoryVersionKeyComparator> MemoryVersionMap; + typedef SafeMap<MemoryVersionKey, uint16_t, MemoryVersionKeyComparator, + ScopedArenaAllocatorAdapter<std::pair<MemoryVersionKey, uint16_t> > > MemoryVersionMap; // Maps field key to field id for resolved fields. - typedef SafeMap<FieldReference, uint32_t, FieldReferenceComparator> FieldIndexMap; + typedef SafeMap<FieldReference, uint32_t, FieldReferenceComparator, + ScopedArenaAllocatorAdapter<std::pair<FieldReference, uint16_t> > > FieldIndexMap; + // A set of value names. + typedef std::set<uint16_t, std::less<uint16_t>, + ScopedArenaAllocatorAdapter<uint16_t> > ValueNameSet; public: - explicit LocalValueNumbering(CompilationUnit* cu) - : cu_(cu), - sreg_value_map_(), - sreg_wide_value_map_(), - value_map_(), - next_memory_version_(1u), - global_memory_version_(0u), - memory_version_map_(), - field_index_map_(), - non_aliasing_refs_(), - null_checked_() { - std::fill_n(unresolved_sfield_version_, kFieldTypeCount, 0u); - std::fill_n(unresolved_ifield_version_, kFieldTypeCount, 0u); + static LocalValueNumbering* Create(CompilationUnit* cu) { + UniquePtr<ScopedArenaAllocator> allocator(ScopedArenaAllocator::Create(&cu->arena_stack)); + void* addr = allocator->Alloc(sizeof(LocalValueNumbering), kArenaAllocMisc); + return new(addr) LocalValueNumbering(cu, allocator.release()); } static uint64_t BuildKey(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) { @@ -167,7 +167,26 @@ class LocalValueNumbering { uint16_t GetValueNumber(MIR* mir); + // Allow delete-expression to destroy a LocalValueNumbering object without deallocation. + static void operator delete(void* ptr) { UNUSED(ptr); } + private: + LocalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator) + : cu_(cu), + allocator_(allocator), + sreg_value_map_(std::less<uint16_t>(), allocator->Adapter()), + sreg_wide_value_map_(std::less<uint16_t>(), allocator->Adapter()), + value_map_(std::less<uint64_t>(), allocator->Adapter()), + next_memory_version_(1u), + global_memory_version_(0u), + memory_version_map_(MemoryVersionKeyComparator(), allocator->Adapter()), + field_index_map_(FieldReferenceComparator(), allocator->Adapter()), + non_aliasing_refs_(std::less<uint16_t>(), allocator->Adapter()), + null_checked_(std::less<uint16_t>(), allocator->Adapter()) { + std::fill_n(unresolved_sfield_version_, kFieldTypeCount, 0u); + std::fill_n(unresolved_ifield_version_, kFieldTypeCount, 0u); + } + uint16_t GetFieldId(const DexFile* dex_file, uint16_t field_idx); void AdvanceGlobalMemory(); uint16_t GetMemoryVersion(uint16_t base, uint16_t field, uint16_t type); @@ -179,6 +198,7 @@ class LocalValueNumbering { void HandlePutObject(MIR* mir); CompilationUnit* const cu_; + UniquePtr<ScopedArenaAllocator> allocator_; SregValueMap sreg_value_map_; SregValueMap sreg_wide_value_map_; ValueMap value_map_; @@ -189,8 +209,10 @@ class LocalValueNumbering { MemoryVersionMap memory_version_map_; FieldIndexMap field_index_map_; // Value names of references to objects that cannot be reached through a different value name. - std::set<uint16_t> non_aliasing_refs_; - std::set<uint16_t> null_checked_; + ValueNameSet non_aliasing_refs_; + ValueNameSet null_checked_; + + DISALLOW_COPY_AND_ASSIGN(LocalValueNumbering); }; } // namespace art diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc index 4599612db6..ebac871b2d 100644 --- a/compiler/dex/local_value_numbering_test.cc +++ b/compiler/dex/local_value_numbering_test.cc @@ -120,7 +120,7 @@ class LocalValueNumberingTest : public testing::Test { void DoPrepareMIRs(const MIRDef* defs, size_t count) { mir_count_ = count; - mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, ArenaAllocator::kAllocMIR)); + mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR)); ssa_reps_.resize(count); for (size_t i = 0u; i != count; ++i) { const MIRDef* def = &defs[i]; @@ -162,11 +162,16 @@ class LocalValueNumberingTest : public testing::Test { void PerformLVN() { value_names_.resize(mir_count_); for (size_t i = 0; i != mir_count_; ++i) { - value_names_[i] = lvn_.GetValueNumber(&mirs_[i]); + value_names_[i] = lvn_->GetValueNumber(&mirs_[i]); } } - LocalValueNumberingTest() : pool_(), cu_(&pool_), mir_count_(0u), mirs_(nullptr), lvn_(&cu_) { + LocalValueNumberingTest() + : pool_(), + cu_(&pool_), + mir_count_(0u), + mirs_(nullptr), + lvn_(LocalValueNumbering::Create(&cu_)) { cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena)); } @@ -176,7 +181,7 @@ class LocalValueNumberingTest : public testing::Test { MIR* mirs_; std::vector<SSARepresentation> ssa_reps_; std::vector<uint16_t> value_names_; - LocalValueNumbering lvn_; + UniquePtr<LocalValueNumbering> lvn_; }; TEST_F(LocalValueNumberingTest, TestIGetIGetInvokeIGet) { diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc index d159f49b3e..667ee267ea 100644 --- a/compiler/dex/mir_analysis.cc +++ b/compiler/dex/mir_analysis.cc @@ -1095,16 +1095,15 @@ bool MIRGraph::SkipCompilation() { } void MIRGraph::DoCacheFieldLoweringInfo() { - // Try to use stack-allocated array, resort to heap if we exceed the initial size. - static constexpr size_t kInitialSize = 32; - uint16_t stack_idxs[kInitialSize]; - UniquePtr<uint16_t[]> allocated_idxs; - uint16_t* field_idxs = stack_idxs; - size_t size = kInitialSize; + // All IGET/IPUT/SGET/SPUT instructions take 2 code units and there must also be a RETURN. + const uint32_t max_refs = (current_code_item_->insns_size_in_code_units_ - 1u) / 2u; + ScopedArenaAllocator allocator(&cu_->arena_stack); + uint16_t* field_idxs = + reinterpret_cast<uint16_t*>(allocator.Alloc(max_refs * sizeof(uint16_t), kArenaAllocMisc)); // Find IGET/IPUT/SGET/SPUT insns, store IGET/IPUT fields at the beginning, SGET/SPUT at the end. size_t ifield_pos = 0u; - size_t sfield_pos = size; + size_t sfield_pos = max_refs; AllNodesIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) { if (bb->block_type != kDalvikByteCode) { @@ -1113,14 +1112,12 @@ void MIRGraph::DoCacheFieldLoweringInfo() { for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) { if (mir->dalvikInsn.opcode >= Instruction::IGET && mir->dalvikInsn.opcode <= Instruction::SPUT_SHORT) { - bool need_alloc = false; const Instruction* insn = Instruction::At(current_code_item_->insns_ + mir->offset); - uint16_t field_idx; // Get field index and try to find it among existing indexes. If found, it's usually among // the last few added, so we'll start the search from ifield_pos/sfield_pos. Though this // is a linear search, it actually performs much better than map based approach. if (mir->dalvikInsn.opcode <= Instruction::IPUT_SHORT) { - field_idx = insn->VRegC_22c(); + uint16_t field_idx = insn->VRegC_22c(); size_t i = ifield_pos; while (i != 0u && field_idxs[i - 1] != field_idx) { --i; @@ -1129,44 +1126,18 @@ void MIRGraph::DoCacheFieldLoweringInfo() { mir->meta.ifield_lowering_info = i - 1; } else { mir->meta.ifield_lowering_info = ifield_pos; - if (UNLIKELY(ifield_pos == sfield_pos)) { - need_alloc = true; - } else { - field_idxs[ifield_pos++] = field_idx; - } + field_idxs[ifield_pos++] = field_idx; } } else { - field_idx = insn->VRegB_21c(); + uint16_t field_idx = insn->VRegB_21c(); size_t i = sfield_pos; - while (i != size && field_idxs[i] != field_idx) { + while (i != max_refs && field_idxs[i] != field_idx) { ++i; } - if (i != size) { - mir->meta.sfield_lowering_info = size - i - 1u; - } else { - mir->meta.sfield_lowering_info = size - sfield_pos; - if (UNLIKELY(ifield_pos == sfield_pos)) { - need_alloc = true; - } else { - field_idxs[--sfield_pos] = field_idx; - } - } - } - if (UNLIKELY(need_alloc)) { - DCHECK(field_idxs == stack_idxs); - // All IGET/IPUT/SGET/SPUT instructions take 2 code units and there must also be a RETURN. - uint32_t max_refs = (current_code_item_->insns_size_in_code_units_ - 1u) / 2u; - allocated_idxs.reset(new uint16_t[max_refs]); - field_idxs = allocated_idxs.get(); - size_t sfield_count = size - sfield_pos; - sfield_pos = max_refs - sfield_count; - size = max_refs; - memcpy(field_idxs, stack_idxs, ifield_pos * sizeof(field_idxs[0])); - memcpy(field_idxs + sfield_pos, stack_idxs + ifield_pos, - sfield_count * sizeof(field_idxs[0])); - if (mir->dalvikInsn.opcode <= Instruction::IPUT_SHORT) { - field_idxs[ifield_pos++] = field_idx; + if (i != max_refs) { + mir->meta.sfield_lowering_info = max_refs - i - 1u; } else { + mir->meta.sfield_lowering_info = max_refs - sfield_pos; field_idxs[--sfield_pos] = field_idx; } } @@ -1186,16 +1157,16 @@ void MIRGraph::DoCacheFieldLoweringInfo() { ifield_lowering_infos_.GetRawStorage(), ifield_pos); } - if (sfield_pos != size) { + if (sfield_pos != max_refs) { // Resolve static field infos. DCHECK_EQ(sfield_lowering_infos_.Size(), 0u); - sfield_lowering_infos_.Resize(size - sfield_pos); - for (size_t pos = size; pos != sfield_pos;) { + sfield_lowering_infos_.Resize(max_refs - sfield_pos); + for (size_t pos = max_refs; pos != sfield_pos;) { --pos; sfield_lowering_infos_.Insert(MirSFieldLoweringInfo(field_idxs[pos])); } MirSFieldLoweringInfo::Resolve(cu_->compiler_driver, GetCurrentDexCompilationUnit(), - sfield_lowering_infos_.GetRawStorage(), size - sfield_pos); + sfield_lowering_infos_.GetRawStorage(), max_refs - sfield_pos); } } diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index 96804503fe..c3954fe3d7 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -955,10 +955,10 @@ void MIRGraph::DataFlowSSAFormat35C(MIR* mir) { mir->ssa_rep->num_uses = num_uses; mir->ssa_rep->uses = static_cast<int*>(arena_->Alloc(sizeof(int) * num_uses, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); // NOTE: will be filled in during type & size inference pass mir->ssa_rep->fp_use = static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_uses, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); for (i = 0; i < num_uses; i++) { HandleSSAUse(mir->ssa_rep->uses, d_insn->arg[i], i); @@ -973,10 +973,10 @@ void MIRGraph::DataFlowSSAFormat3RC(MIR* mir) { mir->ssa_rep->num_uses = num_uses; mir->ssa_rep->uses = static_cast<int*>(arena_->Alloc(sizeof(int) * num_uses, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); // NOTE: will be filled in during type & size inference pass mir->ssa_rep->fp_use = static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_uses, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); for (i = 0; i < num_uses; i++) { HandleSSAUse(mir->ssa_rep->uses, d_insn->vC+i, i); @@ -992,7 +992,7 @@ bool MIRGraph::DoSSAConversion(BasicBlock* bb) { for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { mir->ssa_rep = static_cast<struct SSARepresentation *>(arena_->Alloc(sizeof(SSARepresentation), - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode]; @@ -1042,9 +1042,9 @@ bool MIRGraph::DoSSAConversion(BasicBlock* bb) { if (num_uses) { mir->ssa_rep->num_uses = num_uses; mir->ssa_rep->uses = static_cast<int*>(arena_->Alloc(sizeof(int) * num_uses, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); mir->ssa_rep->fp_use = static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_uses, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); } int num_defs = 0; @@ -1059,9 +1059,9 @@ bool MIRGraph::DoSSAConversion(BasicBlock* bb) { if (num_defs) { mir->ssa_rep->num_defs = num_defs; mir->ssa_rep->defs = static_cast<int*>(arena_->Alloc(sizeof(int) * num_defs, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); mir->ssa_rep->fp_def = static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_defs, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); } DecodedInstruction *d_insn = &mir->dalvikInsn; @@ -1110,7 +1110,7 @@ bool MIRGraph::DoSSAConversion(BasicBlock* bb) { */ bb->data_flow_info->vreg_to_ssa_map = static_cast<int*>(arena_->Alloc(sizeof(int) * cu_->num_dalvik_registers, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); memcpy(bb->data_flow_info->vreg_to_ssa_map, vreg_to_ssa_map_, sizeof(int) * cu_->num_dalvik_registers); @@ -1147,11 +1147,11 @@ void MIRGraph::CompilerInitializeSSAConversion() { */ vreg_to_ssa_map_ = static_cast<int*>(arena_->Alloc(sizeof(int) * num_dalvik_reg, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); /* Keep track of the higest def for each dalvik reg */ ssa_last_defs_ = static_cast<int*>(arena_->Alloc(sizeof(int) * num_dalvik_reg, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); for (unsigned int i = 0; i < num_dalvik_reg; i++) { vreg_to_ssa_map_[i] = i; @@ -1175,7 +1175,7 @@ void MIRGraph::CompilerInitializeSSAConversion() { bb->block_type == kExitBlock) { bb->data_flow_info = static_cast<BasicBlockDataFlow*>(arena_->Alloc(sizeof(BasicBlockDataFlow), - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); } } } diff --git a/compiler/dex/mir_field_info.cc b/compiler/dex/mir_field_info.cc index 96eda01d1e..7c630e8229 100644 --- a/compiler/dex/mir_field_info.cc +++ b/compiler/dex/mir_field_info.cc @@ -24,7 +24,7 @@ #include "mirror/class_loader.h" // Only to allow casts in SirtRef<ClassLoader>. #include "mirror/dex_cache.h" // Only to allow casts in SirtRef<DexCache>. #include "scoped_thread_state_change.h" -#include "sirt_ref.h" +#include "sirt_ref-inl.h" namespace art { diff --git a/compiler/dex/mir_field_info.h b/compiler/dex/mir_field_info.h index 41cb4cee14..e64e9fcf83 100644 --- a/compiler/dex/mir_field_info.h +++ b/compiler/dex/mir_field_info.h @@ -100,7 +100,7 @@ class MirFieldInfo { class MirIFieldLoweringInfo : public MirFieldInfo { public: // For each requested instance field retrieve the field's declaring location (dex file, class - // index and field index) and volatility and compute the whether we can fast path the access + // index and field index) and volatility and compute whether we can fast path the access // with IGET/IPUT. For fast path fields, retrieve the field offset. static void Resolve(CompilerDriver* compiler_driver, const DexCompilationUnit* mUnit, MirIFieldLoweringInfo* field_infos, size_t count) @@ -143,7 +143,7 @@ class MirIFieldLoweringInfo : public MirFieldInfo { class MirSFieldLoweringInfo : public MirFieldInfo { public: // For each requested static field retrieve the field's declaring location (dex file, class - // index and field index) and volatility and compute the whether we can fast path the access with + // index and field index) and volatility and compute whether we can fast path the access with // IGET/IPUT. For fast path fields (at least for IGET), retrieve the information needed for // the field access, i.e. the field offset, whether the field is in the same class as the // method being compiled, whether the declaring class can be safely assumed to be initialized diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 46e854fb2b..868730fc37 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -411,7 +411,7 @@ BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffs /* create */ true, /* immed_pred_block_p */ &cur_block); SuccessorBlockInfo *successor_block_info = static_cast<SuccessorBlockInfo*>(arena_->Alloc(sizeof(SuccessorBlockInfo), - ArenaAllocator::kAllocSuccessor)); + kArenaAllocSuccessor)); successor_block_info->block = case_block->id; successor_block_info->key = (insn->dalvikInsn.opcode == Instruction::PACKED_SWITCH) ? @@ -459,7 +459,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse catches_.insert(catch_block->start_offset); } SuccessorBlockInfo *successor_block_info = reinterpret_cast<SuccessorBlockInfo*> - (arena_->Alloc(sizeof(SuccessorBlockInfo), ArenaAllocator::kAllocSuccessor)); + (arena_->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor)); successor_block_info->block = catch_block->id; successor_block_info->key = iterator.GetHandlerTypeIndex(); cur_block->successor_blocks->Insert(successor_block_info); @@ -518,7 +518,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse new_block->start_offset = insn->offset; cur_block->fall_through = new_block->id; new_block->predecessors->Insert(cur_block->id); - MIR* new_insn = static_cast<MIR*>(arena_->Alloc(sizeof(MIR), ArenaAllocator::kAllocMIR)); + MIR* new_insn = static_cast<MIR*>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR)); *new_insn = *insn; insn->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheck); @@ -602,7 +602,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ /* Parse all instructions and put them into containing basic blocks */ while (code_ptr < code_end) { - MIR *insn = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), ArenaAllocator::kAllocMIR)); + MIR *insn = static_cast<MIR *>(arena_->Alloc(sizeof(MIR), kArenaAllocMIR)); insn->offset = current_offset_; insn->m_unit_index = current_method_; int width = ParseInsn(code_ptr, &insn->dalvikInsn); @@ -1042,7 +1042,7 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) { str.append("]--optimized away"); } int length = str.length() + 1; - ret = static_cast<char*>(arena_->Alloc(length, ArenaAllocator::kAllocDFInfo)); + ret = static_cast<char*>(arena_->Alloc(length, kArenaAllocDFInfo)); strncpy(ret, str.c_str(), length); return ret; } @@ -1157,7 +1157,7 @@ void MIRGraph::DumpMIRGraph() { CallInfo* MIRGraph::NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, bool is_range) { CallInfo* info = static_cast<CallInfo*>(arena_->Alloc(sizeof(CallInfo), - ArenaAllocator::kAllocMisc)); + kArenaAllocMisc)); MIR* move_result_mir = FindMoveResult(bb, mir); if (move_result_mir == NULL) { info->result.location = kLocInvalid; @@ -1167,7 +1167,7 @@ CallInfo* MIRGraph::NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, } info->num_arg_words = mir->ssa_rep->num_uses; info->args = (info->num_arg_words == 0) ? NULL : static_cast<RegLocation*> - (arena_->Alloc(sizeof(RegLocation) * info->num_arg_words, ArenaAllocator::kAllocMisc)); + (arena_->Alloc(sizeof(RegLocation) * info->num_arg_words, kArenaAllocMisc)); for (int i = 0; i < info->num_arg_words; i++) { info->args[i] = GetRawSrc(mir, i); } @@ -1182,7 +1182,7 @@ CallInfo* MIRGraph::NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, // Allocate a new basic block. BasicBlock* MIRGraph::NewMemBB(BBType block_type, int block_id) { BasicBlock* bb = static_cast<BasicBlock*>(arena_->Alloc(sizeof(BasicBlock), - ArenaAllocator::kAllocBB)); + kArenaAllocBB)); bb->block_type = block_type; bb->id = block_id; // TUNING: better estimate of the exit block predecessors? @@ -1196,7 +1196,7 @@ BasicBlock* MIRGraph::NewMemBB(BBType block_type, int block_id) { void MIRGraph::InitializeConstantPropagation() { is_constant_v_ = new (arena_) ArenaBitVector(arena_, GetNumSSARegs(), false); - constant_values_ = static_cast<int*>(arena_->Alloc(sizeof(int) * GetNumSSARegs(), ArenaAllocator::kAllocDFInfo)); + constant_values_ = static_cast<int*>(arena_->Alloc(sizeof(int) * GetNumSSARegs(), kArenaAllocDFInfo)); } void MIRGraph::InitializeMethodUses() { diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 1eb9ef9bef..85d6d894b0 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -457,7 +457,7 @@ class MIRGraph { void EnableOpcodeCounting() { opcode_count_ = static_cast<int*>(arena_->Alloc(kNumPackedOpcodes * sizeof(int), - ArenaAllocator::kAllocMisc)); + kArenaAllocMisc)); } void ShowOpcodeStats(); diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index 243452e968..03fc091e4d 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -245,7 +245,7 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide) } CompilerTemp *compiler_temp = static_cast<CompilerTemp *>(arena_->Alloc(sizeof(CompilerTemp), - ArenaAllocator::kAllocRegAlloc)); + kArenaAllocRegAlloc)); // Create the type of temp requested. Special temps need special handling because // they have a specific virtual register assignment. @@ -313,7 +313,7 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { bool use_lvn = bb->use_lvn; UniquePtr<LocalValueNumbering> local_valnum; if (use_lvn) { - local_valnum.reset(new LocalValueNumbering(cu_)); + local_valnum.reset(LocalValueNumbering::Create(cu_)); } while (bb != NULL) { for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { @@ -479,7 +479,7 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { DCHECK_EQ(SelectKind(if_true), kSelectMove); DCHECK_EQ(SelectKind(if_false), kSelectMove); int* src_ssa = - static_cast<int*>(arena_->Alloc(sizeof(int) * 3, ArenaAllocator::kAllocDFInfo)); + static_cast<int*>(arena_->Alloc(sizeof(int) * 3, kArenaAllocDFInfo)); src_ssa[0] = mir->ssa_rep->uses[0]; src_ssa[1] = if_true->ssa_rep->uses[0]; src_ssa[2] = if_false->ssa_rep->uses[0]; @@ -488,14 +488,14 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { } mir->ssa_rep->num_defs = 1; mir->ssa_rep->defs = - static_cast<int*>(arena_->Alloc(sizeof(int) * 1, ArenaAllocator::kAllocDFInfo)); + static_cast<int*>(arena_->Alloc(sizeof(int) * 1, kArenaAllocDFInfo)); mir->ssa_rep->fp_def = - static_cast<bool*>(arena_->Alloc(sizeof(bool) * 1, ArenaAllocator::kAllocDFInfo)); + static_cast<bool*>(arena_->Alloc(sizeof(bool) * 1, kArenaAllocDFInfo)); mir->ssa_rep->fp_def[0] = if_true->ssa_rep->fp_def[0]; // Match type of uses to def. mir->ssa_rep->fp_use = static_cast<bool*>(arena_->Alloc(sizeof(bool) * mir->ssa_rep->num_uses, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); for (int i = 0; i < mir->ssa_rep->num_uses; i++) { mir->ssa_rep->fp_use[i] = mir->ssa_rep->fp_def[0]; } @@ -878,7 +878,7 @@ bool MIRGraph::EliminateNullChecksAndInferTypes(BasicBlock* bb) { void MIRGraph::DumpCheckStats() { Checkstats* stats = - static_cast<Checkstats*>(arena_->Alloc(sizeof(Checkstats), ArenaAllocator::kAllocDFInfo)); + static_cast<Checkstats*>(arena_->Alloc(sizeof(Checkstats), kArenaAllocDFInfo)); checkstats_ = stats; AllNodesIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index f426055068..0fce5bbb3d 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -50,12 +50,12 @@ void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, } // Add the table to the list - we'll process it later SwitchTable *tab_rec = - static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); + static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; uint32_t size = table[1]; tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), - ArenaAllocator::kAllocLIR)); + kArenaAllocLIR)); switch_tables_.Insert(tab_rec); // Get the switch value @@ -99,12 +99,12 @@ void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, } // Add the table to the list - we'll process it later SwitchTable *tab_rec = - static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); + static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; uint32_t size = table[1]; tab_rec->targets = - static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), ArenaAllocator::kAllocLIR)); + static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR)); switch_tables_.Insert(tab_rec); // Get the switch value @@ -152,7 +152,7 @@ void ArmMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; // Add the table to the list - we'll process it later FillArrayData *tab_rec = - static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), ArenaAllocator::kAllocData)); + static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), kArenaAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; uint16_t width = tab_rec->table[1]; diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index ab1a053489..01d669b90c 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -554,13 +554,13 @@ void ArmMir2Lir::CompilerInitializeRegAlloc() { int num_fp_regs = sizeof(FpRegs)/sizeof(*FpRegs); int num_fp_temps = sizeof(fp_temps)/sizeof(*fp_temps); reg_pool_ = static_cast<RegisterPool*>(arena_->Alloc(sizeof(*reg_pool_), - ArenaAllocator::kAllocRegAlloc)); + kArenaAllocRegAlloc)); reg_pool_->num_core_regs = num_regs; reg_pool_->core_regs = reinterpret_cast<RegisterInfo*> - (arena_->Alloc(num_regs * sizeof(*reg_pool_->core_regs), ArenaAllocator::kAllocRegAlloc)); + (arena_->Alloc(num_regs * sizeof(*reg_pool_->core_regs), kArenaAllocRegAlloc)); reg_pool_->num_fp_regs = num_fp_regs; reg_pool_->FPRegs = static_cast<RegisterInfo*> - (arena_->Alloc(num_fp_regs * sizeof(*reg_pool_->FPRegs), ArenaAllocator::kAllocRegAlloc)); + (arena_->Alloc(num_fp_regs * sizeof(*reg_pool_->FPRegs), kArenaAllocRegAlloc)); CompilerInitPool(reg_pool_->core_regs, core_regs, reg_pool_->num_core_regs); CompilerInitPool(reg_pool_->FPRegs, FpRegs, reg_pool_->num_fp_regs); // Keep special registers from being allocated diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 14469b61c3..34d3834682 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -358,7 +358,7 @@ LIR* Mir2Lir::ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi) { LIR* Mir2Lir::AddWordData(LIR* *constant_list_p, int value) { /* Add the constant to the literal pool */ if (constant_list_p) { - LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocData)); + LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData)); new_value->operands[0] = value; new_value->next = *constant_list_p; *constant_list_p = new_value; @@ -829,7 +829,7 @@ LIR* Mir2Lir::InsertCaseLabel(DexOffset vaddr, int keyVal) { LIR* res = boundary_lir; if (cu_->verbose) { // Only pay the expense if we're pretty-printing. - LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR)); + LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR)); new_label->dalvik_offset = vaddr; new_label->opcode = kPseudoCaseLabel; new_label->operands[0] = keyVal; diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index cb424d9169..7423393e13 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -18,7 +18,6 @@ #include "base/macros.h" #include "base/mutex.h" #include "base/mutex-inl.h" -#include "locks.h" #include "thread.h" #include "thread-inl.h" #include "dex/mir_graph.h" diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h index b4d8dd6009..4aff01c066 100644 --- a/compiler/dex/quick/dex_file_method_inliner.h +++ b/compiler/dex/quick/dex_file_method_inliner.h @@ -23,7 +23,6 @@ #include "safe_map.h" #include "dex/compiler_enums.h" #include "dex_file.h" -#include "locks.h" namespace art { diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc index 7a2dce13dc..6df91e674a 100644 --- a/compiler/dex/quick/local_optimizations.cc +++ b/compiler/dex/quick/local_optimizations.cc @@ -248,7 +248,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { /* Only sink store instructions */ if (sink_distance && !is_this_lir_load) { LIR* new_store_lir = - static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR)); + static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR)); *new_store_lir = *this_lir; /* * Stop point found - insert *before* the check_lir @@ -445,7 +445,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { if (slot >= 0) { LIR* cur_lir = prev_inst_list[slot]; LIR* new_load_lir = - static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR)); + static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR)); *new_load_lir = *this_lir; /* * Insertion is guaranteed to succeed since check_lir diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc index 88f46fd59a..234299e472 100644 --- a/compiler/dex/quick/mips/call_mips.cc +++ b/compiler/dex/quick/mips/call_mips.cc @@ -68,12 +68,12 @@ void MipsMir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, } // Add the table to the list - we'll process it later SwitchTable* tab_rec = - static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); + static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; int elements = table[1]; tab_rec->targets = - static_cast<LIR**>(arena_->Alloc(elements * sizeof(LIR*), ArenaAllocator::kAllocLIR)); + static_cast<LIR**>(arena_->Alloc(elements * sizeof(LIR*), kArenaAllocLIR)); switch_tables_.Insert(tab_rec); // The table is composed of 8-byte key/disp pairs @@ -146,12 +146,12 @@ void MipsMir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, } // Add the table to the list - we'll process it later SwitchTable* tab_rec = - static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); + static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; int size = table[1]; tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), - ArenaAllocator::kAllocLIR)); + kArenaAllocLIR)); switch_tables_.Insert(tab_rec); // Get the switch value @@ -226,7 +226,7 @@ void MipsMir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) { // Add the table to the list - we'll process it later FillArrayData* tab_rec = reinterpret_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), - ArenaAllocator::kAllocData)); + kArenaAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; uint16_t width = tab_rec->table[1]; diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index 85c250da0f..4f495ee0fc 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -467,13 +467,13 @@ void MipsMir2Lir::CompilerInitializeRegAlloc() { int num_fp_regs = sizeof(FpRegs)/sizeof(*FpRegs); int num_fp_temps = sizeof(fp_temps)/sizeof(*fp_temps); reg_pool_ = static_cast<RegisterPool*>(arena_->Alloc(sizeof(*reg_pool_), - ArenaAllocator::kAllocRegAlloc)); + kArenaAllocRegAlloc)); reg_pool_->num_core_regs = num_regs; reg_pool_->core_regs = static_cast<RegisterInfo*> - (arena_->Alloc(num_regs * sizeof(*reg_pool_->core_regs), ArenaAllocator::kAllocRegAlloc)); + (arena_->Alloc(num_regs * sizeof(*reg_pool_->core_regs), kArenaAllocRegAlloc)); reg_pool_->num_fp_regs = num_fp_regs; reg_pool_->FPRegs = static_cast<RegisterInfo*> - (arena_->Alloc(num_fp_regs * sizeof(*reg_pool_->FPRegs), ArenaAllocator::kAllocRegAlloc)); + (arena_->Alloc(num_fp_regs * sizeof(*reg_pool_->FPRegs), kArenaAllocRegAlloc)); CompilerInitPool(reg_pool_->core_regs, core_regs, reg_pool_->num_core_regs); CompilerInitPool(reg_pool_->FPRegs, FpRegs, reg_pool_->num_fp_regs); // Keep special registers from being allocated diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h index c2d12f6481..8b1f81d47f 100644 --- a/compiler/dex/quick/mir_to_lir-inl.h +++ b/compiler/dex/quick/mir_to_lir-inl.h @@ -45,7 +45,7 @@ inline void Mir2Lir::ClobberBody(RegisterInfo* p) { inline LIR* Mir2Lir::RawLIR(DexOffset dalvik_offset, int opcode, int op0, int op1, int op2, int op3, int op4, LIR* target) { - LIR* insn = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR)); + LIR* insn = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR)); insn->dalvik_offset = dalvik_offset; insn->opcode = opcode; insn->operands[0] = op0; diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index d9b241e864..40ed5ef535 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -1066,7 +1066,7 @@ void Mir2Lir::MethodMIR2LIR() { // Hold the labels of each block. block_label_list_ = static_cast<LIR*>(arena_->Alloc(sizeof(LIR) * mir_graph_->GetNumBlocks(), - ArenaAllocator::kAllocLIR)); + kArenaAllocLIR)); PreOrderDfsIterator iter(mir_graph_); BasicBlock* curr_bb = iter.Next(); diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 9e0e29995e..6955577670 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -311,7 +311,7 @@ class Mir2Lir : public Backend { virtual void Compile() = 0; static void* operator new(size_t size, ArenaAllocator* arena) { - return arena->Alloc(size, ArenaAllocator::kAllocData); + return arena->Alloc(size, kArenaAllocData); } protected: @@ -363,7 +363,7 @@ class Mir2Lir : public Backend { // strdup(), but allocates from the arena. char* ArenaStrdup(const char* str) { size_t len = strlen(str) + 1; - char* res = reinterpret_cast<char*>(arena_->Alloc(len, ArenaAllocator::kAllocMisc)); + char* res = reinterpret_cast<char*>(arena_->Alloc(len, kArenaAllocMisc)); if (res != NULL) { strncpy(res, str, len); } diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index 3a8942e46e..3cb6fd01c1 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -907,7 +907,7 @@ void Mir2Lir::DoPromotion() { const int promotion_threshold = 1; // Allocate the promotion map - one entry for each Dalvik vReg or compiler temp promotion_map_ = static_cast<PromotionMap*> - (arena_->Alloc(num_regs * sizeof(promotion_map_[0]), ArenaAllocator::kAllocRegAlloc)); + (arena_->Alloc(num_regs * sizeof(promotion_map_[0]), kArenaAllocRegAlloc)); // Allow target code to add any special registers AdjustSpillMask(); @@ -925,10 +925,10 @@ void Mir2Lir::DoPromotion() { */ RefCounts *core_regs = static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * num_regs, - ArenaAllocator::kAllocRegAlloc)); + kArenaAllocRegAlloc)); RefCounts *FpRegs = static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs * 2, - ArenaAllocator::kAllocRegAlloc)); + kArenaAllocRegAlloc)); // Set ssa names for original Dalvik registers for (int i = 0; i < dalvik_regs; i++) { core_regs[i].s_reg = FpRegs[i].s_reg = i; diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index c92d2bb730..577f216f5e 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -69,12 +69,12 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, } // Add the table to the list - we'll process it later SwitchTable* tab_rec = - static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); + static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; int size = table[1]; tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), - ArenaAllocator::kAllocLIR)); + kArenaAllocLIR)); switch_tables_.Insert(tab_rec); // Get the switch value @@ -134,7 +134,7 @@ void X86Mir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; // Add the table to the list - we'll process it later FillArrayData* tab_rec = - static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), ArenaAllocator::kAllocData)); + static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), kArenaAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; uint16_t width = tab_rec->table[1]; diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 78a216923f..083fccb2b4 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -457,15 +457,15 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() { int num_fp_regs = sizeof(FpRegs)/sizeof(*FpRegs); int num_fp_temps = sizeof(fp_temps)/sizeof(*fp_temps); reg_pool_ = static_cast<RegisterPool*>(arena_->Alloc(sizeof(*reg_pool_), - ArenaAllocator::kAllocRegAlloc)); + kArenaAllocRegAlloc)); reg_pool_->num_core_regs = num_regs; reg_pool_->core_regs = static_cast<RegisterInfo*>(arena_->Alloc(num_regs * sizeof(*reg_pool_->core_regs), - ArenaAllocator::kAllocRegAlloc)); + kArenaAllocRegAlloc)); reg_pool_->num_fp_regs = num_fp_regs; reg_pool_->FPRegs = static_cast<RegisterInfo *>(arena_->Alloc(num_fp_regs * sizeof(*reg_pool_->FPRegs), - ArenaAllocator::kAllocRegAlloc)); + kArenaAllocRegAlloc)); CompilerInitPool(reg_pool_->core_regs, core_regs, reg_pool_->num_core_regs); CompilerInitPool(reg_pool_->FPRegs, FpRegs, reg_pool_->num_fp_regs); // Keep special registers from being allocated diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc index 4e258ef7c7..8091528809 100644 --- a/compiler/dex/ssa_transformation.cc +++ b/compiler/dex/ssa_transformation.cc @@ -144,7 +144,7 @@ void MIRGraph::ComputeDefBlockMatrix() { /* Allocate num_dalvik_registers bit vector pointers */ def_block_matrix_ = static_cast<ArenaBitVector**> (arena_->Alloc(sizeof(ArenaBitVector *) * num_registers, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); int i; /* Initialize num_register vectors with num_blocks bits each */ @@ -384,7 +384,7 @@ void MIRGraph::ComputeDominators() { /* Initalize & Clear i_dom_list */ if (i_dom_list_ == NULL) { i_dom_list_ = static_cast<int*>(arena_->Alloc(sizeof(int) * num_reachable_blocks, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); } for (int i = 0; i < num_reachable_blocks; i++) { i_dom_list_[i] = NOTVISITED; @@ -565,7 +565,7 @@ void MIRGraph::InsertPhiNodes() { continue; } MIR *phi = - static_cast<MIR*>(arena_->Alloc(sizeof(MIR), ArenaAllocator::kAllocDFInfo)); + static_cast<MIR*>(arena_->Alloc(sizeof(MIR), kArenaAllocDFInfo)); phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi); phi->dalvikInsn.vA = dalvik_reg; phi->offset = phi_bb->start_offset; @@ -593,13 +593,13 @@ bool MIRGraph::InsertPhiNodeOperands(BasicBlock* bb) { size_t num_uses = bb->predecessors->Size(); mir->ssa_rep->num_uses = num_uses; int* uses = static_cast<int*>(arena_->Alloc(sizeof(int) * num_uses, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); mir->ssa_rep->uses = uses; mir->ssa_rep->fp_use = - static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_uses, ArenaAllocator::kAllocDFInfo)); + static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_uses, kArenaAllocDFInfo)); BasicBlockId* incoming = static_cast<BasicBlockId*>(arena_->Alloc(sizeof(BasicBlockId) * num_uses, - ArenaAllocator::kAllocDFInfo)); + kArenaAllocDFInfo)); mir->meta.phi_incoming = incoming; int idx = 0; while (true) { @@ -629,7 +629,7 @@ void MIRGraph::DoDFSPreOrderSSARename(BasicBlock* block) { /* Save SSA map snapshot */ int* saved_ssa_map = - static_cast<int*>(arena_->Alloc(map_size, ArenaAllocator::kAllocDalvikToSSAMap)); + static_cast<int*>(arena_->Alloc(map_size, kArenaAllocDalvikToSSAMap)); memcpy(saved_ssa_map, vreg_to_ssa_map_, map_size); if (block->fall_through != NullBasicBlockId) { diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h index aa0e72a5ca..257e70ce93 100644 --- a/compiler/dex/verified_method.h +++ b/compiler/dex/verified_method.h @@ -19,6 +19,7 @@ #include <vector> +#include "base/mutex.h" #include "method_reference.h" #include "safe_map.h" diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc index 4d2c05166b..876973625d 100644 --- a/compiler/dex/vreg_analysis.cc +++ b/compiler/dex/vreg_analysis.cc @@ -410,7 +410,7 @@ void MIRGraph::InitRegLocations() { /* Allocate the location map */ int max_regs = GetNumSSARegs() + GetMaxPossibleCompilerTemps(); RegLocation* loc = static_cast<RegLocation*>(arena_->Alloc(max_regs * sizeof(*loc), - ArenaAllocator::kAllocRegAlloc)); + kArenaAllocRegAlloc)); for (int i = 0; i < GetNumSSARegs(); i++) { loc[i] = fresh_loc; loc[i].s_reg_low = i; diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h index d401398ca4..1499ae4872 100644 --- a/compiler/driver/compiler_driver-inl.h +++ b/compiler/driver/compiler_driver-inl.h @@ -25,6 +25,7 @@ #include "mirror/dex_cache.h" #include "mirror/art_field-inl.h" #include "scoped_thread_state_change.h" +#include "sirt_ref-inl.h" namespace art { diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index fc22addbf1..d3d58c919f 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -49,6 +49,7 @@ #include "mirror/throwable.h" #include "scoped_thread_state_change.h" #include "ScopedLocalRef.h" +#include "sirt_ref-inl.h" #include "thread.h" #include "thread_pool.h" #include "trampolines/trampoline_compiler.h" diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 80a6796a4e..ac70e5aee0 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -52,6 +52,7 @@ struct InlineIGetIPutData; class OatWriter; class ParallelCompilationManager; class ScopedObjectAccess; +template<class T> class SirtRef; class TimingLogger; class VerificationResults; class VerifiedMethod; diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 2b3af6281f..949fade906 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -30,6 +30,7 @@ #include "mirror/dex_cache-inl.h" #include "mirror/object_array-inl.h" #include "mirror/object-inl.h" +#include "sirt_ref-inl.h" namespace art { diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h index 3610d1a8b2..03b965acf7 100644 --- a/compiler/elf_writer.h +++ b/compiler/elf_writer.h @@ -23,6 +23,7 @@ #include <vector> #include "base/macros.h" +#include "base/mutex.h" #include "os.h" namespace art { diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 74be604c31..f4b507a64a 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -50,7 +50,7 @@ #include "object_utils.h" #include "runtime.h" #include "scoped_thread_state_change.h" -#include "sirt_ref.h" +#include "sirt_ref-inl.h" #include "UniquePtr.h" #include "utils.h" diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 5394bbc42e..ffd7b417e3 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -33,6 +33,7 @@ #include "output_stream.h" #include "safe_map.h" #include "scoped_thread_state_change.h" +#include "sirt_ref-inl.h" #include "verifier/method_verifier.h" namespace art { diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h index 21245db70d..cb5aa273bd 100644 --- a/compiler/trampolines/trampoline_compiler.h +++ b/compiler/trampolines/trampoline_compiler.h @@ -20,7 +20,6 @@ #include <stdint.h> #include <vector> -#include "locks.h" #include "driver/compiler_driver.h" namespace art { diff --git a/compiler/utils/allocation.h b/compiler/utils/allocation.h index 07cd39788e..b0947cac68 100644 --- a/compiler/utils/allocation.h +++ b/compiler/utils/allocation.h @@ -26,7 +26,7 @@ class ArenaObject { public: // Allocate a new ArenaObject of 'size' bytes in the Arena. void* operator new(size_t size, ArenaAllocator* allocator) { - return allocator->Alloc(size, ArenaAllocator::kAllocMisc); + return allocator->Alloc(size, kArenaAllocMisc); } void operator delete(void*, size_t) { diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc index 00c3c578df..ca4635d352 100644 --- a/compiler/utils/arena_allocator.cc +++ b/compiler/utils/arena_allocator.cc @@ -14,6 +14,9 @@ * limitations under the License. */ +#include <algorithm> +#include <numeric> + #include "arena_allocator.h" #include "base/logging.h" #include "base/mutex.h" @@ -28,7 +31,8 @@ static constexpr bool kUseMemSet = true && kUseMemMap; static constexpr size_t kValgrindRedZoneBytes = 8; constexpr size_t Arena::kDefaultSize; -static const char* alloc_names[ArenaAllocator::kNumAllocKinds] = { +template <bool kCount> +const char* ArenaAllocatorStatsImpl<kCount>::kAllocNames[kNumArenaAllocKinds] = { "Misc ", "BasicBlock ", "LIR ", @@ -42,8 +46,69 @@ static const char* alloc_names[ArenaAllocator::kNumAllocKinds] = { "RegAlloc ", "Data ", "Preds ", + "STL ", }; +template <bool kCount> +ArenaAllocatorStatsImpl<kCount>::ArenaAllocatorStatsImpl() + : num_allocations_(0u) { + std::fill_n(alloc_stats_, arraysize(alloc_stats_), 0u); +} + +template <bool kCount> +void ArenaAllocatorStatsImpl<kCount>::Copy(const ArenaAllocatorStatsImpl& other) { + num_allocations_ = other.num_allocations_; + std::copy(other.alloc_stats_, other.alloc_stats_ + arraysize(alloc_stats_), alloc_stats_); +} + +template <bool kCount> +void ArenaAllocatorStatsImpl<kCount>::RecordAlloc(size_t bytes, ArenaAllocKind kind) { + alloc_stats_[kind] += bytes; + ++num_allocations_; +} + +template <bool kCount> +size_t ArenaAllocatorStatsImpl<kCount>::NumAllocations() const { + return num_allocations_; +} + +template <bool kCount> +size_t ArenaAllocatorStatsImpl<kCount>::BytesAllocated() const { + const size_t init = 0u; // Initial value of the correct type. + return std::accumulate(alloc_stats_, alloc_stats_ + arraysize(alloc_stats_), init); +} + +template <bool kCount> +void ArenaAllocatorStatsImpl<kCount>::Dump(std::ostream& os, const Arena* first, + ssize_t lost_bytes_adjustment) const { + size_t malloc_bytes = 0u; + size_t lost_bytes = 0u; + size_t num_arenas = 0u; + for (const Arena* arena = first; arena != nullptr; arena = arena->next_) { + malloc_bytes += arena->Size(); + lost_bytes += arena->RemainingSpace(); + ++num_arenas; + } + // The lost_bytes_adjustment is used to make up for the fact that the current arena + // may not have the bytes_allocated_ updated correctly. + lost_bytes += lost_bytes_adjustment; + const size_t bytes_allocated = BytesAllocated(); + os << " MEM: used: " << bytes_allocated << ", allocated: " << malloc_bytes + << ", lost: " << lost_bytes << "\n"; + size_t num_allocations = NumAllocations(); + if (num_allocations != 0) { + os << "Number of arenas allocated: " << num_arenas << ", Number of allocations: " + << num_allocations << ", avg size: " << bytes_allocated / num_allocations << "\n"; + } + os << "===== Allocation by kind\n"; + for (int i = 0; i < kNumArenaAllocKinds; i++) { + os << kAllocNames[i] << std::setw(10) << alloc_stats_[i] << "\n"; + } +} + +// Explicitly instantiate the used implementation. +template class ArenaAllocatorStatsImpl<kArenaAllocatorCountAllocations>; + Arena::Arena(size_t size) : bytes_allocated_(0), map_(nullptr), @@ -110,24 +175,26 @@ Arena* ArenaPool::AllocArena(size_t size) { return ret; } -void ArenaPool::FreeArena(Arena* arena) { - Thread* self = Thread::Current(); +void ArenaPool::FreeArenaChain(Arena* first) { if (UNLIKELY(RUNNING_ON_VALGRIND > 0)) { - VALGRIND_MAKE_MEM_UNDEFINED(arena->memory_, arena->bytes_allocated_); + for (Arena* arena = first; arena != nullptr; arena = arena->next_) { + VALGRIND_MAKE_MEM_UNDEFINED(arena->memory_, arena->bytes_allocated_); + } } - { + if (first != nullptr) { + Arena* last = first; + while (last->next_ != nullptr) { + last = last->next_; + } + Thread* self = Thread::Current(); MutexLock lock(self, lock_); - arena->next_ = free_arenas_; - free_arenas_ = arena; + last->next_ = free_arenas_; + free_arenas_ = first; } } size_t ArenaAllocator::BytesAllocated() const { - size_t total = 0; - for (int i = 0; i < kNumAllocKinds; i++) { - total += alloc_stats_[i]; - } - return total; + return ArenaAllocatorStats::BytesAllocated(); } ArenaAllocator::ArenaAllocator(ArenaPool* pool) @@ -136,9 +203,7 @@ ArenaAllocator::ArenaAllocator(ArenaPool* pool) end_(nullptr), ptr_(nullptr), arena_head_(nullptr), - num_allocations_(0), running_on_valgrind_(RUNNING_ON_VALGRIND > 0) { - memset(&alloc_stats_[0], 0, sizeof(alloc_stats_)); } void ArenaAllocator::UpdateBytesAllocated() { @@ -158,10 +223,7 @@ void* ArenaAllocator::AllocValgrind(size_t bytes, ArenaAllocKind kind) { return nullptr; } } - if (kCountAllocations) { - alloc_stats_[kind] += rounded_bytes; - ++num_allocations_; - } + ArenaAllocatorStats::RecordAlloc(rounded_bytes, kind); uint8_t* ret = ptr_; ptr_ += rounded_bytes; // Check that the memory is already zeroed out. @@ -175,11 +237,7 @@ void* ArenaAllocator::AllocValgrind(size_t bytes, ArenaAllocKind kind) { ArenaAllocator::~ArenaAllocator() { // Reclaim all the arenas by giving them back to the thread pool. UpdateBytesAllocated(); - while (arena_head_ != nullptr) { - Arena* arena = arena_head_; - arena_head_ = arena_head_->next_; - pool_->FreeArena(arena); - } + pool_->FreeArenaChain(arena_head_); } void ArenaAllocator::ObtainNewArenaForAllocation(size_t allocation_size) { @@ -192,30 +250,24 @@ void ArenaAllocator::ObtainNewArenaForAllocation(size_t allocation_size) { end_ = new_arena->End(); } +MemStats::MemStats(const char* name, const ArenaAllocatorStats* stats, const Arena* first_arena, + ssize_t lost_bytes_adjustment) + : name_(name), + stats_(stats), + first_arena_(first_arena), + lost_bytes_adjustment_(lost_bytes_adjustment) { +} + +void MemStats::Dump(std::ostream& os) const { + os << name_ << " stats:\n"; + stats_->Dump(os, first_arena_, lost_bytes_adjustment_); +} + // Dump memory usage stats. -void ArenaAllocator::DumpMemStats(std::ostream& os) const { - size_t malloc_bytes = 0; - // Start out with how many lost bytes we have in the arena we are currently allocating into. - size_t lost_bytes(end_ - ptr_); - size_t num_arenas = 0; - for (Arena* arena = arena_head_; arena != nullptr; arena = arena->next_) { - malloc_bytes += arena->Size(); - if (arena != arena_head_) { - lost_bytes += arena->RemainingSpace(); - } - ++num_arenas; - } - const size_t bytes_allocated = BytesAllocated(); - os << " MEM: used: " << bytes_allocated << ", allocated: " << malloc_bytes - << ", lost: " << lost_bytes << "\n"; - if (num_allocations_ != 0) { - os << "Number of arenas allocated: " << num_arenas << ", Number of allocations: " - << num_allocations_ << ", avg size: " << bytes_allocated / num_allocations_ << "\n"; - } - os << "===== Allocation by kind\n"; - for (int i = 0; i < kNumAllocKinds; i++) { - os << alloc_names[i] << std::setw(10) << alloc_stats_[i] << "\n"; - } +MemStats ArenaAllocator::GetMemStats() const { + ssize_t lost_bytes_adjustment = + (arena_head_ == nullptr) ? 0 : (end_ - ptr_) - arena_head_->RemainingSpace(); + return MemStats("ArenaAllocator", this, arena_head_, lost_bytes_adjustment); } } // namespace art diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h index 56cedfefd5..18a5bce77d 100644 --- a/compiler/utils/arena_allocator.h +++ b/compiler/utils/arena_allocator.h @@ -20,6 +20,7 @@ #include <stdint.h> #include <stddef.h> +#include "base/macros.h" #include "base/mutex.h" #include "mem_map.h" @@ -28,6 +29,72 @@ namespace art { class Arena; class ArenaPool; class ArenaAllocator; +class ArenaStack; +class ScopedArenaAllocator; +class MemStats; + +static constexpr bool kArenaAllocatorCountAllocations = false; + +// Type of allocation for memory tuning. +enum ArenaAllocKind { + kArenaAllocMisc, + kArenaAllocBB, + kArenaAllocLIR, + kArenaAllocMIR, + kArenaAllocDFInfo, + kArenaAllocGrowableArray, + kArenaAllocGrowableBitMap, + kArenaAllocDalvikToSSAMap, + kArenaAllocDebugInfo, + kArenaAllocSuccessor, + kArenaAllocRegAlloc, + kArenaAllocData, + kArenaAllocPredecessors, + kArenaAllocSTL, + kNumArenaAllocKinds +}; + +template <bool kCount> +class ArenaAllocatorStatsImpl; + +template <> +class ArenaAllocatorStatsImpl<false> { + public: + ArenaAllocatorStatsImpl() = default; + ArenaAllocatorStatsImpl(const ArenaAllocatorStatsImpl& other) = default; + ArenaAllocatorStatsImpl& operator = (const ArenaAllocatorStatsImpl& other) = delete; + + void Copy(const ArenaAllocatorStatsImpl& other) { UNUSED(other); } + void RecordAlloc(size_t bytes, ArenaAllocKind kind) { UNUSED(bytes); UNUSED(kind); } + size_t NumAllocations() const { return 0u; } + size_t BytesAllocated() const { return 0u; } + void Dump(std::ostream& os, const Arena* first, ssize_t lost_bytes_adjustment) const { + UNUSED(os); UNUSED(first); UNUSED(lost_bytes_adjustment); + } +}; + +template <bool kCount> +class ArenaAllocatorStatsImpl { + public: + ArenaAllocatorStatsImpl(); + ArenaAllocatorStatsImpl(const ArenaAllocatorStatsImpl& other) = default; + ArenaAllocatorStatsImpl& operator = (const ArenaAllocatorStatsImpl& other) = delete; + + void Copy(const ArenaAllocatorStatsImpl& other); + void RecordAlloc(size_t bytes, ArenaAllocKind kind); + size_t NumAllocations() const; + size_t BytesAllocated() const; + void Dump(std::ostream& os, const Arena* first, ssize_t lost_bytes_adjustment) const; + + private: + size_t num_allocations_; + // TODO: Use std::array<size_t, kNumArenaAllocKinds> from C++11 when we upgrade the STL. + size_t alloc_stats_[kNumArenaAllocKinds]; // Bytes used by various allocation kinds. + + static const char* kAllocNames[kNumArenaAllocKinds]; +}; + +typedef ArenaAllocatorStatsImpl<kArenaAllocatorCountAllocations> ArenaAllocatorStats; class Arena { public: @@ -59,6 +126,9 @@ class Arena { Arena* next_; friend class ArenaPool; friend class ArenaAllocator; + friend class ArenaStack; + friend class ScopedArenaAllocator; + template <bool kCount> friend class ArenaAllocatorStatsImpl; DISALLOW_COPY_AND_ASSIGN(Arena); }; @@ -67,7 +137,7 @@ class ArenaPool { ArenaPool(); ~ArenaPool(); Arena* AllocArena(size_t size); - void FreeArena(Arena* arena); + void FreeArenaChain(Arena* first); private: Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; @@ -75,28 +145,8 @@ class ArenaPool { DISALLOW_COPY_AND_ASSIGN(ArenaPool); }; -class ArenaAllocator { +class ArenaAllocator : private ArenaAllocatorStats { public: - // Type of allocation for memory tuning. - enum ArenaAllocKind { - kAllocMisc, - kAllocBB, - kAllocLIR, - kAllocMIR, - kAllocDFInfo, - kAllocGrowableArray, - kAllocGrowableBitMap, - kAllocDalvikToSSAMap, - kAllocDebugInfo, - kAllocSuccessor, - kAllocRegAlloc, - kAllocData, - kAllocPredecessors, - kNumAllocKinds - }; - - static constexpr bool kCountAllocations = false; - explicit ArenaAllocator(ArenaPool* pool); ~ArenaAllocator(); @@ -113,10 +163,7 @@ class ArenaAllocator { return nullptr; } } - if (kCountAllocations) { - alloc_stats_[kind] += bytes; - ++num_allocations_; - } + ArenaAllocatorStats::RecordAlloc(bytes, kind); uint8_t* ret = ptr_; ptr_ += bytes; return ret; @@ -125,7 +172,7 @@ class ArenaAllocator { void* AllocValgrind(size_t bytes, ArenaAllocKind kind); void ObtainNewArenaForAllocation(size_t allocation_size); size_t BytesAllocated() const; - void DumpMemStats(std::ostream& os) const; + MemStats GetMemStats() const; private: void UpdateBytesAllocated(); @@ -135,21 +182,22 @@ class ArenaAllocator { uint8_t* end_; uint8_t* ptr_; Arena* arena_head_; - size_t num_allocations_; - size_t alloc_stats_[kNumAllocKinds]; // Bytes used by various allocation kinds. bool running_on_valgrind_; DISALLOW_COPY_AND_ASSIGN(ArenaAllocator); }; // ArenaAllocator -struct MemStats { - public: - void Dump(std::ostream& os) const { - arena_.DumpMemStats(os); - } - explicit MemStats(const ArenaAllocator &arena) : arena_(arena) {} - private: - const ArenaAllocator &arena_; +class MemStats { + public: + MemStats(const char* name, const ArenaAllocatorStats* stats, const Arena* first_arena, + ssize_t lost_bytes_adjustment = 0); + void Dump(std::ostream& os) const; + + private: + const char* const name_; + const ArenaAllocatorStats* const stats_; + const Arena* const first_arena_; + const ssize_t lost_bytes_adjustment_; }; // MemStats } // namespace art diff --git a/compiler/utils/arena_bit_vector.cc b/compiler/utils/arena_bit_vector.cc index 220ff14baa..eff9778612 100644 --- a/compiler/utils/arena_bit_vector.cc +++ b/compiler/utils/arena_bit_vector.cc @@ -25,13 +25,13 @@ class ArenaBitVectorAllocator : public Allocator { ~ArenaBitVectorAllocator() {} virtual void* Alloc(size_t size) { - return arena_->Alloc(size, ArenaAllocator::kAllocGrowableBitMap); + return arena_->Alloc(size, kArenaAllocGrowableBitMap); } virtual void Free(void*) {} // Nop. static void* operator new(size_t size, ArenaAllocator* arena) { - return arena->Alloc(sizeof(ArenaBitVectorAllocator), ArenaAllocator::kAllocGrowableBitMap); + return arena->Alloc(sizeof(ArenaBitVectorAllocator), kArenaAllocGrowableBitMap); } static void operator delete(void* p) {} // Nop. diff --git a/compiler/utils/arena_bit_vector.h b/compiler/utils/arena_bit_vector.h index 6c1461727a..1a3d6a3e34 100644 --- a/compiler/utils/arena_bit_vector.h +++ b/compiler/utils/arena_bit_vector.h @@ -55,7 +55,7 @@ class ArenaBitVector : public BitVector { ~ArenaBitVector() {} static void* operator new(size_t size, ArenaAllocator* arena) { - return arena->Alloc(sizeof(ArenaBitVector), ArenaAllocator::kAllocGrowableBitMap); + return arena->Alloc(sizeof(ArenaBitVector), kArenaAllocGrowableBitMap); } static void operator delete(void* p) {} // Nop. diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index 828dffafa1..dbd078a81c 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -1550,6 +1550,9 @@ void ArmAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst; LoadFromOffset(kLoadWord, dst.AsCoreRegister(), base.AsArm().AsCoreRegister(), offs.Int32Value()); + if (kPoisonHeapReferences) { + rsb(dst.AsCoreRegister(), dst.AsCoreRegister(), ShifterOperand(0)); + } } void ArmAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) { diff --git a/compiler/utils/arm64/managed_register_arm64.cc b/compiler/utils/arm64/managed_register_arm64.cc index cc0b509033..de5cb8cd8d 100644 --- a/compiler/utils/arm64/managed_register_arm64.cc +++ b/compiler/utils/arm64/managed_register_arm64.cc @@ -27,10 +27,10 @@ namespace arm64 { // * [W0, W15] // * [D0, D31] // * [S0, S31] -static const int kNumberOfAvailableCoreRegisters = (X15 - X0) + 1; -static const int kNumberOfAvailableWRegisters = (W15 - W0) + 1; -static const int kNumberOfAvailableDRegisters = kNumberOfDRegisters; -static const int kNumberOfAvailableSRegisters = kNumberOfSRegisters; +// static const int kNumberOfAvailableCoreRegisters = (X15 - X0) + 1; +// static const int kNumberOfAvailableWRegisters = (W15 - W0) + 1; +// static const int kNumberOfAvailableDRegisters = kNumberOfDRegisters; +// static const int kNumberOfAvailableSRegisters = kNumberOfSRegisters; // Returns true if this managed-register overlaps the other managed-register. // GP Register Bank: diff --git a/compiler/utils/debug_stack.h b/compiler/utils/debug_stack.h new file mode 100644 index 0000000000..2e02b438b9 --- /dev/null +++ b/compiler/utils/debug_stack.h @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_DEBUG_STACK_H_ +#define ART_COMPILER_UTILS_DEBUG_STACK_H_ + +#include "base/logging.h" +#include "base/macros.h" +#include "globals.h" + +namespace art { + +// Helper classes for reference counting to enforce construction/destruction order and +// usage of the top element of a stack in debug mode with no overhead in release mode. + +// Reference counter. No references allowed in destructor or in explicitly called CheckNoRefs(). +template <bool kIsDebug> +class DebugStackRefCounterImpl; +// Reference. Allows an explicit check that it's the top reference. +template <bool kIsDebug> +class DebugStackReferenceImpl; +// Indirect top reference. Checks that the reference is the top reference when used. +template <bool kIsDebug> +class DebugStackIndirectTopRefImpl; + +typedef DebugStackRefCounterImpl<kIsDebugBuild> DebugStackRefCounter; +typedef DebugStackReferenceImpl<kIsDebugBuild> DebugStackReference; +typedef DebugStackIndirectTopRefImpl<kIsDebugBuild> DebugStackIndirectTopRef; + +// Non-debug mode specializations. This should be optimized away. + +template <> +class DebugStackRefCounterImpl<false> { + public: + size_t IncrementRefCount() { return 0u; } + void DecrementRefCount() { } + size_t GetRefCount() const { return 0u; } + void CheckNoRefs() const { } +}; + +template <> +class DebugStackReferenceImpl<false> { + public: + explicit DebugStackReferenceImpl(DebugStackRefCounterImpl<false>* counter) { UNUSED(counter); } + DebugStackReferenceImpl(const DebugStackReferenceImpl& other) = default; + DebugStackReferenceImpl& operator=(const DebugStackReferenceImpl& other) = default; + void CheckTop() { } +}; + +template <> +class DebugStackIndirectTopRefImpl<false> { + public: + explicit DebugStackIndirectTopRefImpl(DebugStackReferenceImpl<false>* ref) { UNUSED(ref); } + DebugStackIndirectTopRefImpl(const DebugStackIndirectTopRefImpl& other) = default; + DebugStackIndirectTopRefImpl& operator=(const DebugStackIndirectTopRefImpl& other) = default; + void CheckTop() { } +}; + +// Debug mode versions. + +template <bool kIsDebug> +class DebugStackRefCounterImpl { + public: + DebugStackRefCounterImpl() : ref_count_(0u) { } + ~DebugStackRefCounterImpl() { CheckNoRefs(); } + size_t IncrementRefCount() { return ++ref_count_; } + void DecrementRefCount() { --ref_count_; } + size_t GetRefCount() const { return ref_count_; } + void CheckNoRefs() const { CHECK_EQ(ref_count_, 0u); } + + private: + size_t ref_count_; +}; + +template <bool kIsDebug> +class DebugStackReferenceImpl { + public: + explicit DebugStackReferenceImpl(DebugStackRefCounterImpl<kIsDebug>* counter) + : counter_(counter), ref_count_(counter->IncrementRefCount()) { + } + DebugStackReferenceImpl(const DebugStackReferenceImpl& other) + : counter_(other.counter_), ref_count_(counter_->IncrementRefCount()) { + } + DebugStackReferenceImpl& operator=(const DebugStackReferenceImpl& other) { + CHECK(counter_ == other.counter_); + return *this; + } + ~DebugStackReferenceImpl() { counter_->DecrementRefCount(); } + void CheckTop() { CHECK_EQ(counter_->GetRefCount(), ref_count_); } + + private: + DebugStackRefCounterImpl<true>* counter_; + size_t ref_count_; +}; + +template <bool kIsDebug> +class DebugStackIndirectTopRefImpl { + public: + explicit DebugStackIndirectTopRefImpl(DebugStackReferenceImpl<kIsDebug>* ref) + : ref_(ref) { + CheckTop(); + } + DebugStackIndirectTopRefImpl(const DebugStackIndirectTopRefImpl& other) + : ref_(other.ref_) { + CheckTop(); + } + DebugStackIndirectTopRefImpl& operator=(const DebugStackIndirectTopRefImpl& other) { + CHECK(ref_ == other->ref_); + CheckTop(); + return *this; + } + ~DebugStackIndirectTopRefImpl() { + CheckTop(); + } + void CheckTop() { + ref_->CheckTop(); + } + + private: + DebugStackReferenceImpl<kIsDebug>* ref_; +}; + +} // namespace art + +#endif // ART_COMPILER_UTILS_DEBUG_STACK_H_ diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h index 82b6a607e7..a7d1f0e5a5 100644 --- a/compiler/utils/growable_array.h +++ b/compiler/utils/growable_array.h @@ -75,7 +75,7 @@ class GrowableArray { num_used_(0), kind_(kind) { elem_list_ = static_cast<T*>(arena_->Alloc(sizeof(T) * init_length, - ArenaAllocator::kAllocGrowableArray)); + kArenaAllocGrowableArray)); }; @@ -89,7 +89,7 @@ class GrowableArray { target_length = new_length; } T* new_array = static_cast<T*>(arena_->Alloc(sizeof(T) * target_length, - ArenaAllocator::kAllocGrowableArray)); + kArenaAllocGrowableArray)); memcpy(new_array, elem_list_, sizeof(T) * num_allocated_); num_allocated_ = target_length; elem_list_ = new_array; @@ -181,7 +181,7 @@ class GrowableArray { T* GetRawStorage() const { return elem_list_; } static void* operator new(size_t size, ArenaAllocator* arena) { - return arena->Alloc(sizeof(GrowableArray<T>), ArenaAllocator::kAllocGrowableArray); + return arena->Alloc(sizeof(GrowableArray<T>), kArenaAllocGrowableArray); }; static void operator delete(void* p) {} // Nop. diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index fdd2bab4da..ce21b84867 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -684,6 +684,9 @@ void MipsAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, CHECK(dest.IsCoreRegister() && dest.IsCoreRegister()); LoadFromOffset(kLoadWord, dest.AsCoreRegister(), base.AsMips().AsCoreRegister(), offs.Int32Value()); + if (kPoisonHeapReferences) { + Subu(dest.AsCoreRegister(), ZERO, dest.AsCoreRegister()); + } } void MipsAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc new file mode 100644 index 0000000000..ee3b07ebe9 --- /dev/null +++ b/compiler/utils/scoped_arena_allocator.cc @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "scoped_arena_allocator.h" + +#include "utils/arena_allocator.h" +#include <memcheck/memcheck.h> + +namespace art { + +static constexpr size_t kValgrindRedZoneBytes = 8; + +ArenaStack::ArenaStack(ArenaPool* arena_pool) + : DebugStackRefCounter(), + stats_and_pool_(arena_pool), + bottom_arena_(nullptr), + top_arena_(nullptr), + top_ptr_(nullptr), + top_end_(nullptr), + running_on_valgrind_(RUNNING_ON_VALGRIND > 0) { +} + +ArenaStack::~ArenaStack() { + stats_and_pool_.pool->FreeArenaChain(bottom_arena_); +} + +MemStats ArenaStack::GetPeakStats() const { + DebugStackRefCounter::CheckNoRefs(); + return MemStats("ArenaStack peak", static_cast<const TaggedStats<Peak>*>(&stats_and_pool_), + bottom_arena_); +} + +uint8_t* ArenaStack::AllocateFromNextArena(size_t rounded_bytes) { + UpdateBytesAllocated(); + size_t allocation_size = std::max(Arena::kDefaultSize, rounded_bytes); + if (UNLIKELY(top_arena_ == nullptr)) { + top_arena_ = bottom_arena_ = stats_and_pool_.pool->AllocArena(allocation_size); + top_arena_->next_ = nullptr; + } else if (top_arena_->next_ != nullptr && top_arena_->next_->Size() >= allocation_size) { + top_arena_ = top_arena_->next_; + } else { + Arena* tail = top_arena_->next_; + top_arena_->next_ = stats_and_pool_.pool->AllocArena(allocation_size); + top_arena_ = top_arena_->next_; + top_arena_->next_ = tail; + } + top_end_ = top_arena_->End(); + // top_ptr_ shall be updated by ScopedArenaAllocator. + return top_arena_->Begin(); +} + +void ArenaStack::UpdatePeakStatsAndRestore(const ArenaAllocatorStats& restore_stats) { + if (PeakStats()->BytesAllocated() < CurrentStats()->BytesAllocated()) { + PeakStats()->Copy(*CurrentStats()); + } + CurrentStats()->Copy(restore_stats); +} + +void ArenaStack::UpdateBytesAllocated() { + if (top_arena_ != nullptr) { + // Update how many bytes we have allocated into the arena so that the arena pool knows how + // much memory to zero out. Though ScopedArenaAllocator doesn't guarantee the memory is + // zero-initialized, the Arena may be reused by ArenaAllocator which does guarantee this. + size_t allocated = static_cast<size_t>(top_ptr_ - top_arena_->Begin()); + if (top_arena_->bytes_allocated_ < allocated) { + top_arena_->bytes_allocated_ = allocated; + } + } +} + +void* ArenaStack::AllocValgrind(size_t bytes, ArenaAllocKind kind) { + size_t rounded_bytes = (bytes + kValgrindRedZoneBytes + 3) & ~3; + uint8_t* ptr = top_ptr_; + if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) { + ptr = AllocateFromNextArena(rounded_bytes); + } + CurrentStats()->RecordAlloc(bytes, kind); + top_ptr_ = ptr + rounded_bytes; + VALGRIND_MAKE_MEM_NOACCESS(ptr + bytes, rounded_bytes - bytes); + return ptr; +} + +ScopedArenaAllocator::ScopedArenaAllocator(ArenaStack* arena_stack) + : DebugStackReference(arena_stack), + DebugStackRefCounter(), + ArenaAllocatorStats(*arena_stack->CurrentStats()), + arena_stack_(arena_stack), + mark_arena_(arena_stack->top_arena_), + mark_ptr_(arena_stack->top_ptr_), + mark_end_(arena_stack->top_end_) { +} + +ScopedArenaAllocator::~ScopedArenaAllocator() { + Reset(); +} + +void ScopedArenaAllocator::Reset() { + DebugStackReference::CheckTop(); + DebugStackRefCounter::CheckNoRefs(); + arena_stack_->UpdatePeakStatsAndRestore(*this); + arena_stack_->UpdateBytesAllocated(); + if (LIKELY(mark_arena_ != nullptr)) { + arena_stack_->top_arena_ = mark_arena_; + arena_stack_->top_ptr_ = mark_ptr_; + arena_stack_->top_end_ = mark_end_; + } else if (arena_stack_->bottom_arena_ != nullptr) { + mark_arena_ = arena_stack_->top_arena_ = arena_stack_->bottom_arena_; + mark_ptr_ = arena_stack_->top_ptr_ = mark_arena_->Begin(); + mark_end_ = arena_stack_->top_end_ = mark_arena_->End(); + } +} + +} // namespace art diff --git a/compiler/utils/scoped_arena_allocator.h b/compiler/utils/scoped_arena_allocator.h new file mode 100644 index 0000000000..24a8afea6e --- /dev/null +++ b/compiler/utils/scoped_arena_allocator.h @@ -0,0 +1,244 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_SCOPED_ARENA_ALLOCATOR_H_ +#define ART_COMPILER_UTILS_SCOPED_ARENA_ALLOCATOR_H_ + +#include "base/logging.h" +#include "base/macros.h" +#include "utils/arena_allocator.h" +#include "utils/debug_stack.h" +#include "globals.h" + +namespace art { + +class ArenaStack; +class ScopedArenaAllocator; + +template <typename T> +class ScopedArenaAllocatorAdapter; + +// Holds a list of Arenas for use by ScopedArenaAllocator stack. +class ArenaStack : private DebugStackRefCounter { + public: + explicit ArenaStack(ArenaPool* arena_pool); + ~ArenaStack(); + + size_t PeakBytesAllocated() { + return PeakStats()->BytesAllocated(); + } + + MemStats GetPeakStats() const; + + private: + struct Peak; + struct Current; + template <typename Tag> struct TaggedStats : ArenaAllocatorStats { }; + struct StatsAndPool : TaggedStats<Peak>, TaggedStats<Current> { + explicit StatsAndPool(ArenaPool* arena_pool) : pool(arena_pool) { } + ArenaPool* const pool; + }; + + ArenaAllocatorStats* PeakStats() { + return static_cast<TaggedStats<Peak>*>(&stats_and_pool_); + } + + ArenaAllocatorStats* CurrentStats() { + return static_cast<TaggedStats<Current>*>(&stats_and_pool_); + } + + // Private - access via ScopedArenaAllocator or ScopedArenaAllocatorAdapter. + void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE { + if (UNLIKELY(running_on_valgrind_)) { + return AllocValgrind(bytes, kind); + } + size_t rounded_bytes = (bytes + 3) & ~3; + uint8_t* ptr = top_ptr_; + if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) { + ptr = AllocateFromNextArena(rounded_bytes); + } + CurrentStats()->RecordAlloc(bytes, kind); + top_ptr_ = ptr + rounded_bytes; + return ptr; + } + + uint8_t* AllocateFromNextArena(size_t rounded_bytes); + void UpdatePeakStatsAndRestore(const ArenaAllocatorStats& restore_stats); + void UpdateBytesAllocated(); + void* AllocValgrind(size_t bytes, ArenaAllocKind kind); + + StatsAndPool stats_and_pool_; + Arena* bottom_arena_; + Arena* top_arena_; + uint8_t* top_ptr_; + uint8_t* top_end_; + + const bool running_on_valgrind_; + + friend class ScopedArenaAllocator; + template <typename T> + friend class ScopedArenaAllocatorAdapter; + + DISALLOW_COPY_AND_ASSIGN(ArenaStack); +}; + +class ScopedArenaAllocator + : private DebugStackReference, private DebugStackRefCounter, private ArenaAllocatorStats { + public: + // Create a ScopedArenaAllocator directly on the ArenaStack when the scope of + // the allocator is not exactly a C++ block scope. For example, an optimization + // pass can create the scoped allocator in Start() and destroy it in End(). + static ScopedArenaAllocator* Create(ArenaStack* arena_stack) { + void* addr = arena_stack->Alloc(sizeof(ScopedArenaAllocator), kArenaAllocMisc); + ScopedArenaAllocator* allocator = new(addr) ScopedArenaAllocator(arena_stack); + allocator->mark_ptr_ = reinterpret_cast<uint8_t*>(addr); + return allocator; + } + + explicit ScopedArenaAllocator(ArenaStack* arena_stack); + ~ScopedArenaAllocator(); + + void Reset(); + + void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE { + DebugStackReference::CheckTop(); + return arena_stack_->Alloc(bytes, kind); + } + + // ScopedArenaAllocatorAdapter is incomplete here, we need to define this later. + ScopedArenaAllocatorAdapter<void> Adapter(); + + // Allow a delete-expression to destroy but not deallocate allocators created by Create(). + static void operator delete(void* ptr) { UNUSED(ptr); } + + private: + ArenaStack* const arena_stack_; + Arena* mark_arena_; + uint8_t* mark_ptr_; + uint8_t* mark_end_; + + template <typename T> + friend class ScopedArenaAllocatorAdapter; + + DISALLOW_COPY_AND_ASSIGN(ScopedArenaAllocator); +}; + +template <> +class ScopedArenaAllocatorAdapter<void> + : private DebugStackReference, private DebugStackIndirectTopRef { + public: + typedef void value_type; + typedef void* pointer; + typedef const void* const_pointer; + + template <typename U> + struct rebind { + typedef ScopedArenaAllocatorAdapter<U> other; + }; + + explicit ScopedArenaAllocatorAdapter(ScopedArenaAllocator* arena_allocator) + : DebugStackReference(arena_allocator), + DebugStackIndirectTopRef(arena_allocator), + arena_stack_(arena_allocator->arena_stack_) { + } + template <typename U> + ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter<U>& other) + : DebugStackReference(other), + DebugStackIndirectTopRef(other), + arena_stack_(other.arena_stack_) { + } + ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter& other) = default; + ScopedArenaAllocatorAdapter& operator=(const ScopedArenaAllocatorAdapter& other) = default; + ~ScopedArenaAllocatorAdapter() = default; + + private: + ArenaStack* arena_stack_; + + template <typename U> + friend class ScopedArenaAllocatorAdapter; +}; + +// Adapter for use of ScopedArenaAllocator in STL containers. +template <typename T> +class ScopedArenaAllocatorAdapter : private DebugStackReference, private DebugStackIndirectTopRef { + public: + typedef T value_type; + typedef T* pointer; + typedef T& reference; + typedef const T* const_pointer; + typedef const T& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + + template <typename U> + struct rebind { + typedef ScopedArenaAllocatorAdapter<U> other; + }; + + explicit ScopedArenaAllocatorAdapter(ScopedArenaAllocator* arena_allocator) + : DebugStackReference(arena_allocator), + DebugStackIndirectTopRef(arena_allocator), + arena_stack_(arena_allocator->arena_stack_) { + } + template <typename U> + ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter<U>& other) + : DebugStackReference(other), + DebugStackIndirectTopRef(other), + arena_stack_(other.arena_stack_) { + } + ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter& other) = default; + ScopedArenaAllocatorAdapter& operator=(const ScopedArenaAllocatorAdapter& other) = default; + ~ScopedArenaAllocatorAdapter() = default; + + size_type max_size() const { + return static_cast<size_type>(-1) / sizeof(T); + } + + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + pointer allocate(size_type n, ScopedArenaAllocatorAdapter<void>::pointer hint = nullptr) { + DCHECK_LE(n, max_size()); + DebugStackIndirectTopRef::CheckTop(); + return reinterpret_cast<T*>(arena_stack_->Alloc(n * sizeof(T), kArenaAllocSTL)); + } + void deallocate(pointer p, size_type n) { + DebugStackIndirectTopRef::CheckTop(); + } + + void construct(pointer p, const_reference val) { + DebugStackIndirectTopRef::CheckTop(); + new (static_cast<void*>(p)) value_type(val); + } + void destroy(pointer p) { + DebugStackIndirectTopRef::CheckTop(); + p->~value_type(); + } + + private: + ArenaStack* arena_stack_; + + template <typename U> + friend class ScopedArenaAllocatorAdapter; +}; + +inline ScopedArenaAllocatorAdapter<void> ScopedArenaAllocator::Adapter() { + return ScopedArenaAllocatorAdapter<void>(this); +} + +} // namespace art + +#endif // ART_COMPILER_UTILS_SCOPED_ARENA_ALLOCATOR_H_ diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 26300e02e0..db8956d43b 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1560,6 +1560,9 @@ void X86Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, X86ManagedRegister dest = mdest.AsX86(); CHECK(dest.IsCpuRegister() && dest.IsCpuRegister()); movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs)); + if (kPoisonHeapReferences) { + negl(dest.AsCpuRegister()); + } } void X86Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 67a9e060d9..7c81ffb16e 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -54,7 +54,6 @@ #include "runtime.h" #include "ScopedLocalRef.h" #include "scoped_thread_state_change.h" -#include "sirt_ref.h" #include "vector_output_stream.h" #include "well_known_classes.h" #include "zip_archive.h" diff --git a/runtime/Android.mk b/runtime/Android.mk index bb1bc990ee..18e2d3e630 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -84,7 +84,6 @@ LIBART_COMMON_SRC_FILES := \ jdwp/object_registry.cc \ jni_internal.cc \ jobject_comparator.cc \ - locks.cc \ mem_map.cc \ memory_region.cc \ mirror/art_field.cc \ @@ -289,7 +288,6 @@ LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \ invoke_type.h \ jdwp/jdwp.h \ jdwp/jdwp_constants.h \ - locks.h \ lock_word.h \ mirror/class.h \ oat.h \ @@ -425,14 +423,8 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT endif endef -ifeq ($(ART_BUILD_TARGET_NDEBUG),true) - $(eval $(call build-libart,target,ndebug,$(ART_TARGET_CLANG))) -endif -ifeq ($(ART_BUILD_TARGET_DEBUG),true) - $(eval $(call build-libart,target,debug,$(ART_TARGET_CLANG))) -endif - -# We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target. +# We always build dex2oat and dependencies, even if the host build is otherwise disabled, since +# they are used to cross compile for the target. ifeq ($(WITH_HOST_DALVIK),true) ifeq ($(ART_BUILD_NDEBUG),true) $(eval $(call build-libart,host,ndebug,$(ART_HOST_CLANG))) @@ -441,3 +433,10 @@ ifeq ($(WITH_HOST_DALVIK),true) $(eval $(call build-libart,host,debug,$(ART_HOST_CLANG))) endif endif + +ifeq ($(ART_BUILD_TARGET_NDEBUG),true) + $(eval $(call build-libart,target,ndebug,$(ART_TARGET_CLANG))) +endif +ifeq ($(ART_BUILD_TARGET_DEBUG),true) + $(eval $(call build-libart,target,debug,$(ART_TARGET_CLANG))) +endif diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h index 4a0d08292c..2ccce8dcaf 100644 --- a/runtime/arch/arm/context_arm.h +++ b/runtime/arch/arm/context_arm.h @@ -17,7 +17,6 @@ #ifndef ART_RUNTIME_ARCH_ARM_CONTEXT_ARM_H_ #define ART_RUNTIME_ARCH_ARM_CONTEXT_ARM_H_ -#include "locks.h" #include "arch/context.h" #include "base/logging.h" #include "registers_arm.h" diff --git a/runtime/arch/context.h b/runtime/arch/context.h index 83bbb11fd5..f7b7835466 100644 --- a/runtime/arch/context.h +++ b/runtime/arch/context.h @@ -20,7 +20,7 @@ #include <stddef.h> #include <stdint.h> -#include "locks.h" +#include "base/mutex.h" namespace art { diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S index b59c0cbe50..14975dadd4 100644 --- a/runtime/arch/x86_64/asm_support_x86_64.S +++ b/runtime/arch/x86_64/asm_support_x86_64.S @@ -19,38 +19,25 @@ #include "asm_support_x86_64.h" -#if defined(__APPLE__) - // Mac OS' as(1) doesn't let you name macro parameters. +#if defined(__clang__) + // Clang's as(1) doesn't let you name macro parameters. #define MACRO0(macro_name) .macro macro_name #define MACRO1(macro_name, macro_arg1) .macro macro_name #define MACRO2(macro_name, macro_arg1, macro_args2) .macro macro_name #define MACRO3(macro_name, macro_arg1, macro_args2, macro_args3) .macro macro_name #define END_MACRO .endmacro - // Mac OS' as(1) uses $0, $1, and so on for macro arguments, and function names - // are mangled with an extra underscore prefix. The use of $x for arguments - // mean that literals need to be represented with $$x in macros. - #define SYMBOL(name) _ ## name - #define PLT_SYMBOL(name) _ ## name + // Clang's as(1) uses $0, $1, and so on for macro arguments. #define VAR(name,index) SYMBOL($index) #define PLT_VAR(name, index) SYMBOL($index) #define REG_VAR(name,index) %$index #define CALL_MACRO(name,index) $index + #define FUNCTION_TYPE(name,index) .type $index, @function + #define SIZE(name,index) .size $index, .-$index + + // The use of $x for arguments mean that literals need to be represented with $$x in macros. #define LITERAL(value) $value #define MACRO_LITERAL(value) $$value - - // Mac OS' doesn't like cfi_* directives - #define CFI_STARTPROC - #define CFI_ENDPROC - #define CFI_ADJUST_CFA_OFFSET(size) - #define CFI_DEF_CFA(reg,size) - #define CFI_DEF_CFA_REGISTER(reg) - #define CFI_RESTORE(reg) - #define CFI_REL_OFFSET(reg,size) - - // Mac OS' doesn't support certain directives - #define FUNCTION_TYPE(name) - #define SIZE(name) #else // Regular gas(1) lets you name macro parameters. #define MACRO0(macro_name) .macro macro_name @@ -65,16 +52,19 @@ // no special meaning to $, so literals are still just $x. The use of altmacro means % is a // special character meaning care needs to be taken when passing registers as macro arguments. .altmacro - #define SYMBOL(name) name - #define PLT_SYMBOL(name) name@PLT #define VAR(name,index) name& #define PLT_VAR(name, index) name&@PLT #define REG_VAR(name,index) %name #define CALL_MACRO(name,index) name& + #define FUNCTION_TYPE(name,index) .type name&, @function + #define SIZE(name,index) .size name, .-name + #define LITERAL(value) $value #define MACRO_LITERAL(value) $value +#endif - // CFI support + // CFI support. +#if !defined(__APPLE__) #define CFI_STARTPROC .cfi_startproc #define CFI_ENDPROC .cfi_endproc #define CFI_ADJUST_CFA_OFFSET(size) .cfi_adjust_cfa_offset size @@ -82,9 +72,25 @@ #define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg #define CFI_RESTORE(reg) .cfi_restore reg #define CFI_REL_OFFSET(reg,size) .cfi_rel_offset reg,size +#else + // Mac OS' doesn't like cfi_* directives. + #define CFI_STARTPROC + #define CFI_ENDPROC + #define CFI_ADJUST_CFA_OFFSET(size) + #define CFI_DEF_CFA(reg,size) + #define CFI_DEF_CFA_REGISTER(reg) + #define CFI_RESTORE(reg) + #define CFI_REL_OFFSET(reg,size) +#endif - #define FUNCTION_TYPE(name) .type name&, @function - #define SIZE(name) .size name, .-name + // Symbols. +#if !defined(__APPLE__) + #define SYMBOL(name) name + #define PLT_SYMBOL(name) name ## @PLT +#else + // Mac OS' symbols have an _ prefix. + #define SYMBOL(name) _ ## name + #define PLT_SYMBOL(name) _ ## name #endif /* Cache alignment for function entry */ @@ -93,7 +99,7 @@ MACRO0(ALIGN_FUNCTION_ENTRY) END_MACRO MACRO1(DEFINE_FUNCTION, c_name) - FUNCTION_TYPE(\c_name) + FUNCTION_TYPE(\c_name, 0) .globl VAR(c_name, 0) ALIGN_FUNCTION_ENTRY VAR(c_name, 0): @@ -102,7 +108,7 @@ END_MACRO MACRO1(END_FUNCTION, c_name) CFI_ENDPROC - SIZE(\c_name) + SIZE(\c_name, 0) END_MACRO MACRO1(PUSH, reg) @@ -118,7 +124,7 @@ MACRO1(POP, reg) END_MACRO MACRO1(UNIMPLEMENTED,name) - FUNCTION_TYPE(\name) + FUNCTION_TYPE(\name, 0) .globl VAR(name, 0) ALIGN_FUNCTION_ENTRY VAR(name, 0): @@ -126,21 +132,7 @@ VAR(name, 0): int3 int3 CFI_ENDPROC - SIZE(\name) -END_MACRO - -MACRO0(SETUP_GOT_NOSAVE) - call __x86.get_pc_thunk.bx - addl $_GLOBAL_OFFSET_TABLE_, %ebx -END_MACRO - -MACRO0(SETUP_GOT) - PUSH ebx - SETUP_GOT_NOSAVE -END_MACRO - -MACRO0(UNDO_SETUP_GOT) - POP ebx + SIZE(\name, 0) END_MACRO #endif // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_S_ diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 863fa318cd..a78a1e5676 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -33,7 +33,7 @@ MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME) PUSH r12 // Callee save. PUSH rbp // Callee save. PUSH rbx // Callee save. - subq LITERAL(8), %rsp // Space for Method* (also aligns the frame). + subq MACRO_LITERAL(8), %rsp // Space for Method* (also aligns the frame). CFI_ADJUST_CFA_OFFSET(8) // R10 := ArtMethod* for ref and args callee save frame method. movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10 @@ -76,7 +76,7 @@ MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME) PUSH rdx // Quick arg 2. PUSH rcx // Quick arg 3. // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*. - subq LITERAL(80), %rsp + subq MACRO_LITERAL(80), %rsp CFI_ADJUST_CFA_OFFSET(80) // R10 := ArtMethod* for ref and args callee save frame method. movq RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10 @@ -103,7 +103,7 @@ MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME) movq 56(%rsp), %xmm5 movq 64(%rsp), %xmm6 movq 72(%rsp), %xmm7 - addq LITERAL(80), %rsp + addq MACRO_LITERAL(80), %rsp CFI_ADJUST_CFA_OFFSET(-80) // Restore callee and GPR args, mixed together to agree with core spills bitmap. POP rcx @@ -226,26 +226,26 @@ INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvo MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished) 1: // LOOP movb (%r10), %al // al := *shorty - addq LITERAL(1), %r10 // shorty++ - cmpb LITERAL(0), %al // if (al == '\0') goto xmm_setup_finished + addq MACRO_LITERAL(1), %r10 // shorty++ + cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto xmm_setup_finished je VAR(finished, 1) - cmpb LITERAL(68), %al // if (al == 'D') goto FOUND_DOUBLE + cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto FOUND_DOUBLE je 2f - cmpb LITERAL(70), %al // if (al == 'F') goto FOUND_FLOAT + cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto FOUND_FLOAT je 3f - addq LITERAL(4), %r11 // arg_array++ + addq MACRO_LITERAL(4), %r11 // arg_array++ // Handle extra space in arg array taken by a long. - cmpb LITERAL(74), %al // if (al != 'J') goto LOOP + cmpb MACRO_LITERAL(74), %al // if (al != 'J') goto LOOP jne 1b - addq LITERAL(4), %r11 // arg_array++ + addq MACRO_LITERAL(4), %r11 // arg_array++ jmp 1b // goto LOOP 2: // FOUND_DOUBLE movsd (%r11), REG_VAR(xmm_reg, 0) - addq LITERAL(8), %r11 // arg_array+=2 + addq MACRO_LITERAL(8), %r11 // arg_array+=2 jmp 4f 3: // FOUND_FLOAT movss (%r11), REG_VAR(xmm_reg, 0) - addq LITERAL(4), %r11 // arg_array++ + addq MACRO_LITERAL(4), %r11 // arg_array++ 4: END_MACRO @@ -257,27 +257,27 @@ END_MACRO MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished) 1: // LOOP movb (%r10), %al // al := *shorty - addq LITERAL(1), %r10 // shorty++ - cmpb LITERAL(0), %al // if (al == '\0') goto gpr_setup_finished + addq MACRO_LITERAL(1), %r10 // shorty++ + cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto gpr_setup_finished je VAR(finished, 2) - cmpb LITERAL(74), %al // if (al == 'J') goto FOUND_LONG + cmpb MACRO_LITERAL(74), %al // if (al == 'J') goto FOUND_LONG je 2f - cmpb LITERAL(70), %al // if (al == 'F') goto SKIP_FLOAT + cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto SKIP_FLOAT je 3f - cmpb LITERAL(68), %al // if (al == 'D') goto SKIP_DOUBLE + cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto SKIP_DOUBLE je 4f movl (%r11), REG_VAR(gpr_reg32, 1) - addq LITERAL(4), %r11 // arg_array++ + addq MACRO_LITERAL(4), %r11 // arg_array++ jmp 5f 2: // FOUND_LONG movq (%r11), REG_VAR(gpr_reg64, 0) - addq LITERAL(8), %r11 // arg_array+=2 + addq MACRO_LITERAL(8), %r11 // arg_array+=2 jmp 5f 3: // SKIP_FLOAT - addq LITERAL(4), %r11 // arg_array++ + addq MACRO_LITERAL(4), %r11 // arg_array++ jmp 1b 4: // SKIP_DOUBLE - addq LITERAL(8), %r11 // arg_array+=2 + addq MACRO_LITERAL(8), %r11 // arg_array+=2 jmp 1b 5: END_MACRO @@ -766,7 +766,11 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline // 16-byte aligned: 4336 // Note: 14x8 = 7*16, so the stack stays aligned for the native call... // Also means: the padding is somewhere in the middle - subq LITERAL(4336), %rsp + // + // + // New test: use 5K and release + // 5k = 5120 + subq LITERAL(5120), %rsp // prepare for artQuickGenericJniTrampoline call // (Thread*, SP) // rdi rsi <= C calling convention @@ -774,9 +778,13 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq %gs:THREAD_SELF_OFFSET, %rdi movq %rbp, %rsi call PLT_SYMBOL(artQuickGenericJniTrampoline) // (Thread*, sp) - test %rax, %rax // check whether code pointer is NULL, also indicates exception - jz 1f - // pop from the register-passing alloca + test %rax, %rax // check whether error (negative value) + js 1f + // release part of the alloca + addq %rax, %rsp + // get the code pointer + popq %rax + // pop from the register-passing alloca region // what's the right layout? popq %rdi popq %rsi diff --git a/runtime/barrier.h b/runtime/barrier.h index e335c327be..0c7fd87a79 100644 --- a/runtime/barrier.h +++ b/runtime/barrier.h @@ -18,7 +18,6 @@ #define ART_RUNTIME_BARRIER_H_ #include "base/mutex.h" -#include "locks.h" #include "UniquePtr.h" namespace art { diff --git a/runtime/base/logging.h b/runtime/base/logging.h index 075d571197..0fcec1f277 100644 --- a/runtime/base/logging.h +++ b/runtime/base/logging.h @@ -192,7 +192,7 @@ class LogMessage { : data_(new LogMessageData(file, line, severity, error)) { } - ~LogMessage() LOCKS_EXCLUDED(Locks::logging_lock_); + ~LogMessage(); // TODO: enable LOCKS_EXCLUDED(Locks::logging_lock_). std::ostream& stream() { return data_->buffer; @@ -235,32 +235,6 @@ std::ostream& operator<<(std::ostream& os, const Dumpable<T>& rhs) { return os; } -template<typename T> -class MutatorLockedDumpable { - public: - explicit MutatorLockedDumpable(T& value) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) : value_(value) { - } - - void Dump(std::ostream& os) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - value_.Dump(os); - } - - private: - T& value_; - - DISALLOW_COPY_AND_ASSIGN(MutatorLockedDumpable); -}; - -template<typename T> -std::ostream& operator<<(std::ostream& os, const MutatorLockedDumpable<T>& rhs) -// TODO: should be SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) however annotalysis -// currently fails for this. - NO_THREAD_SAFETY_ANALYSIS { - rhs.Dump(os); - return os; -} - // Helps you use operator<< in a const char*-like context such as our various 'F' methods with // format strings. template<typename T> diff --git a/runtime/base/macros.h b/runtime/base/macros.h index 6cc9396bf2..b193ff18d4 100644 --- a/runtime/base/macros.h +++ b/runtime/base/macros.h @@ -178,48 +178,40 @@ char (&ArraySizeHelper(T (&array)[N]))[N]; template<typename T> void UNUSED(const T&) {} -#if defined(__SUPPORT_TS_ANNOTATION__) - -#define ACQUIRED_AFTER(...) __attribute__ ((acquired_after(__VA_ARGS__))) -#define ACQUIRED_BEFORE(...) __attribute__ ((acquired_before(__VA_ARGS__))) -#define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__ ((exclusive_lock(__VA_ARGS__))) -#define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__ ((exclusive_locks_required(__VA_ARGS__))) -#define EXCLUSIVE_TRYLOCK_FUNCTION(...) __attribute__ ((exclusive_trylock(__VA_ARGS__))) -#define GUARDED_BY(x) __attribute__ ((guarded_by(x))) -#define GUARDED_VAR __attribute__ ((guarded)) -#define LOCKABLE __attribute__ ((lockable)) -#define LOCK_RETURNED(x) __attribute__ ((lock_returned(x))) -#define LOCKS_EXCLUDED(...) __attribute__ ((locks_excluded(__VA_ARGS__))) -#define NO_THREAD_SAFETY_ANALYSIS __attribute__ ((no_thread_safety_analysis)) -#define PT_GUARDED_BY(x) __attribute__ ((point_to_guarded_by(x))) -#define PT_GUARDED_VAR __attribute__ ((point_to_guarded)) -#define SCOPED_LOCKABLE __attribute__ ((scoped_lockable)) -#define SHARED_LOCK_FUNCTION(...) __attribute__ ((shared_lock(__VA_ARGS__))) -#define SHARED_LOCKS_REQUIRED(...) __attribute__ ((shared_locks_required(__VA_ARGS__))) -#define SHARED_TRYLOCK_FUNCTION(...) __attribute__ ((shared_trylock(__VA_ARGS__))) -#define UNLOCK_FUNCTION(...) __attribute__ ((unlock(__VA_ARGS__))) - +// Annotalysis thread-safety analysis support. +#if defined(__SUPPORT_TS_ANNOTATION__) || defined(__clang__) +#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x)) #else +#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op +#endif -#define ACQUIRED_AFTER(...) -#define ACQUIRED_BEFORE(...) -#define EXCLUSIVE_LOCK_FUNCTION(...) -#define EXCLUSIVE_LOCKS_REQUIRED(...) -#define EXCLUSIVE_TRYLOCK_FUNCTION(...) -#define GUARDED_BY(x) -#define GUARDED_VAR -#define LOCKABLE -#define LOCK_RETURNED(x) -#define LOCKS_EXCLUDED(...) -#define NO_THREAD_SAFETY_ANALYSIS +#define ACQUIRED_AFTER(...) THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__)) +#define ACQUIRED_BEFORE(...) THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__)) +#define EXCLUSIVE_LOCKS_REQUIRED(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(__VA_ARGS__)) +#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x)) +#define GUARDED_VAR THREAD_ANNOTATION_ATTRIBUTE__(guarded) +#define LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(lockable) +#define LOCK_RETURNED(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x)) +#define LOCKS_EXCLUDED(...) THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__)) +#define NO_THREAD_SAFETY_ANALYSIS THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis) #define PT_GUARDED_BY(x) -#define PT_GUARDED_VAR -#define SCOPED_LOCKABLE -#define SHARED_LOCK_FUNCTION(...) -#define SHARED_LOCKS_REQUIRED(...) -#define SHARED_TRYLOCK_FUNCTION(...) -#define UNLOCK_FUNCTION(...) - -#endif // defined(__SUPPORT_TS_ANNOTATION__) +// THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded_by(x)) +#define PT_GUARDED_VAR THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded) +#define SCOPED_LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable) +#define SHARED_LOCKS_REQUIRED(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_locks_required(__VA_ARGS__)) + +#if defined(__clang__) +#define EXCLUSIVE_LOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock_function(__VA_ARGS__)) +#define EXCLUSIVE_TRYLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock_function(__VA_ARGS__)) +#define SHARED_LOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_lock_function(__VA_ARGS__)) +#define SHARED_TRYLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock_function(__VA_ARGS__)) +#define UNLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(unlock_function(__VA_ARGS__)) +#else +#define EXCLUSIVE_LOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock(__VA_ARGS__)) +#define EXCLUSIVE_TRYLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock(__VA_ARGS__)) +#define SHARED_LOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_lock(__VA_ARGS__)) +#define SHARED_TRYLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock(__VA_ARGS__)) +#define UNLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(unlock(__VA_ARGS__)) +#endif #endif // ART_RUNTIME_BASE_MACROS_H_ diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc index ff72d16908..fdf5763c3f 100644 --- a/runtime/base/mutex.cc +++ b/runtime/base/mutex.cc @@ -29,6 +29,30 @@ namespace art { +Mutex* Locks::abort_lock_ = nullptr; +Mutex* Locks::breakpoint_lock_ = nullptr; +Mutex* Locks::deoptimization_lock_ = nullptr; +ReaderWriterMutex* Locks::classlinker_classes_lock_ = nullptr; +ReaderWriterMutex* Locks::heap_bitmap_lock_ = nullptr; +Mutex* Locks::logging_lock_ = nullptr; +ReaderWriterMutex* Locks::mutator_lock_ = nullptr; +Mutex* Locks::runtime_shutdown_lock_ = nullptr; +Mutex* Locks::thread_list_lock_ = nullptr; +Mutex* Locks::thread_suspend_count_lock_ = nullptr; +Mutex* Locks::trace_lock_ = nullptr; +Mutex* Locks::profiler_lock_ = nullptr; +Mutex* Locks::unexpected_signal_lock_ = nullptr; +Mutex* Locks::intern_table_lock_ = nullptr; + +struct AllMutexData { + // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait). + Atomic<const BaseMutex*> all_mutexes_guard; + // All created mutexes guarded by all_mutexes_guard_. + std::set<BaseMutex*>* all_mutexes; + AllMutexData() : all_mutexes(NULL) {} +}; +static struct AllMutexData gAllMutexData[kAllMutexDataSize]; + #if ART_USE_FUTEXES static bool ComputeRelativeTimeSpec(timespec* result_ts, const timespec& lhs, const timespec& rhs) { const int32_t one_sec = 1000 * 1000 * 1000; // one second in nanoseconds. @@ -45,15 +69,6 @@ static bool ComputeRelativeTimeSpec(timespec* result_ts, const timespec& lhs, co } #endif -struct AllMutexData { - // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait). - Atomic<const BaseMutex*> all_mutexes_guard; - // All created mutexes guarded by all_mutexes_guard_. - std::set<BaseMutex*>* all_mutexes; - AllMutexData() : all_mutexes(NULL) {} -}; -static struct AllMutexData gAllMutexData[kAllMutexDataSize]; - class ScopedAllMutexesLock { public: explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) { @@ -792,4 +807,53 @@ void ConditionVariable::TimedWait(Thread* self, int64_t ms, int32_t ns) { guard_.recursion_count_ = old_recursion_count; } +void Locks::Init() { + if (logging_lock_ != nullptr) { + // Already initialized. + DCHECK(abort_lock_ != nullptr); + DCHECK(breakpoint_lock_ != nullptr); + DCHECK(deoptimization_lock_ != nullptr); + DCHECK(classlinker_classes_lock_ != nullptr); + DCHECK(heap_bitmap_lock_ != nullptr); + DCHECK(logging_lock_ != nullptr); + DCHECK(mutator_lock_ != nullptr); + DCHECK(thread_list_lock_ != nullptr); + DCHECK(thread_suspend_count_lock_ != nullptr); + DCHECK(trace_lock_ != nullptr); + DCHECK(profiler_lock_ != nullptr); + DCHECK(unexpected_signal_lock_ != nullptr); + DCHECK(intern_table_lock_ != nullptr); + } else { + logging_lock_ = new Mutex("logging lock", kLoggingLock, true); + abort_lock_ = new Mutex("abort lock", kAbortLock, true); + + DCHECK(breakpoint_lock_ == nullptr); + breakpoint_lock_ = new Mutex("breakpoint lock", kBreakpointLock); + DCHECK(deoptimization_lock_ == nullptr); + deoptimization_lock_ = new Mutex("deoptimization lock", kDeoptimizationLock); + DCHECK(classlinker_classes_lock_ == nullptr); + classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock", + kClassLinkerClassesLock); + DCHECK(heap_bitmap_lock_ == nullptr); + heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", kHeapBitmapLock); + DCHECK(mutator_lock_ == nullptr); + mutator_lock_ = new ReaderWriterMutex("mutator lock", kMutatorLock); + DCHECK(runtime_shutdown_lock_ == nullptr); + runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", kRuntimeShutdownLock); + DCHECK(thread_list_lock_ == nullptr); + thread_list_lock_ = new Mutex("thread list lock", kThreadListLock); + DCHECK(thread_suspend_count_lock_ == nullptr); + thread_suspend_count_lock_ = new Mutex("thread suspend count lock", kThreadSuspendCountLock); + DCHECK(trace_lock_ == nullptr); + trace_lock_ = new Mutex("trace lock", kTraceLock); + DCHECK(profiler_lock_ == nullptr); + profiler_lock_ = new Mutex("profiler lock", kProfilerLock); + DCHECK(unexpected_signal_lock_ == nullptr); + unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true); + DCHECK(intern_table_lock_ == nullptr); + intern_table_lock_ = new Mutex("InternTable lock", kInternTableLock); + } +} + + } // namespace art diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index 63ed6cbe2f..55ec1c3839 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -27,7 +27,6 @@ #include "base/logging.h" #include "base/macros.h" #include "globals.h" -#include "locks.h" #if defined(__APPLE__) #define ART_USE_FUTEXES 0 @@ -44,9 +43,56 @@ namespace art { +class LOCKABLE ReaderWriterMutex; class ScopedContentionRecorder; class Thread; +// LockLevel is used to impose a lock hierarchy [1] where acquisition of a Mutex at a higher or +// equal level to a lock a thread holds is invalid. The lock hierarchy achieves a cycle free +// partial ordering and thereby cause deadlock situations to fail checks. +// +// [1] http://www.drdobbs.com/parallel/use-lock-hierarchies-to-avoid-deadlock/204801163 +enum LockLevel { + kLoggingLock = 0, + kUnexpectedSignalLock, + kThreadSuspendCountLock, + kAbortLock, + kJdwpSocketLock, + kRosAllocGlobalLock, + kRosAllocBracketLock, + kRosAllocBulkFreeLock, + kAllocSpaceLock, + kDexFileMethodInlinerLock, + kDexFileToMethodInlinerMapLock, + kMarkSweepMarkStackLock, + kTransactionLogLock, + kInternTableLock, + kMonitorPoolLock, + kDefaultMutexLevel, + kMarkSweepLargeObjectLock, + kPinTableLock, + kLoadLibraryLock, + kJdwpObjectRegistryLock, + kClassLinkerClassesLock, + kBreakpointLock, + kMonitorLock, + kThreadListLock, + kBreakpointInvokeLock, + kDeoptimizationLock, + kTraceLock, + kProfilerLock, + kJdwpEventListLock, + kJdwpAttachLock, + kJdwpStartLock, + kRuntimeShutdownLock, + kHeapBitmapLock, + kMutatorLock, + kZygoteCreationLock, + + kLockLevelCount // Must come last. +}; +std::ostream& operator<<(std::ostream& os, const LockLevel& rhs); + const bool kDebugLocking = kIsDebugBuild; // Record Log contention information, dumpable via SIGQUIT. @@ -413,6 +459,117 @@ class SCOPED_LOCKABLE WriterMutexLock { // "WriterMutexLock mu(lock)". #define WriterMutexLock(x) COMPILE_ASSERT(0, writer_mutex_lock_declaration_missing_variable_name) +// Global mutexes corresponding to the levels above. +class Locks { + public: + static void Init(); + + // The mutator_lock_ is used to allow mutators to execute in a shared (reader) mode or to block + // mutators by having an exclusive (writer) owner. In normal execution each mutator thread holds + // a share on the mutator_lock_. The garbage collector may also execute with shared access but + // at times requires exclusive access to the heap (not to be confused with the heap meta-data + // guarded by the heap_lock_ below). When the garbage collector requires exclusive access it asks + // the mutators to suspend themselves which also involves usage of the thread_suspend_count_lock_ + // to cover weaknesses in using ReaderWriterMutexes with ConditionVariables. We use a condition + // variable to wait upon in the suspension logic as releasing and then re-acquiring a share on + // the mutator lock doesn't necessarily allow the exclusive user (e.g the garbage collector) + // chance to acquire the lock. + // + // Thread suspension: + // Shared users | Exclusive user + // (holding mutator lock and in kRunnable state) | .. running .. + // .. running .. | Request thread suspension by: + // .. running .. | - acquiring thread_suspend_count_lock_ + // .. running .. | - incrementing Thread::suspend_count_ on + // .. running .. | all mutator threads + // .. running .. | - releasing thread_suspend_count_lock_ + // .. running .. | Block trying to acquire exclusive mutator lock + // Poll Thread::suspend_count_ and enter full | .. blocked .. + // suspend code. | .. blocked .. + // Change state to kSuspended | .. blocked .. + // x: Release share on mutator_lock_ | Carry out exclusive access + // Acquire thread_suspend_count_lock_ | .. exclusive .. + // while Thread::suspend_count_ > 0 | .. exclusive .. + // - wait on Thread::resume_cond_ | .. exclusive .. + // (releases thread_suspend_count_lock_) | .. exclusive .. + // .. waiting .. | Release mutator_lock_ + // .. waiting .. | Request thread resumption by: + // .. waiting .. | - acquiring thread_suspend_count_lock_ + // .. waiting .. | - decrementing Thread::suspend_count_ on + // .. waiting .. | all mutator threads + // .. waiting .. | - notifying on Thread::resume_cond_ + // - re-acquire thread_suspend_count_lock_ | - releasing thread_suspend_count_lock_ + // Release thread_suspend_count_lock_ | .. running .. + // Acquire share on mutator_lock_ | .. running .. + // - This could block but the thread still | .. running .. + // has a state of kSuspended and so this | .. running .. + // isn't an issue. | .. running .. + // Acquire thread_suspend_count_lock_ | .. running .. + // - we poll here as we're transitioning into | .. running .. + // kRunnable and an individual thread suspend | .. running .. + // request (e.g for debugging) won't try | .. running .. + // to acquire the mutator lock (which would | .. running .. + // block as we hold the mutator lock). This | .. running .. + // poll ensures that if the suspender thought | .. running .. + // we were suspended by incrementing our | .. running .. + // Thread::suspend_count_ and then reading | .. running .. + // our state we go back to waiting on | .. running .. + // Thread::resume_cond_. | .. running .. + // can_go_runnable = Thread::suspend_count_ == 0 | .. running .. + // Release thread_suspend_count_lock_ | .. running .. + // if can_go_runnable | .. running .. + // Change state to kRunnable | .. running .. + // else | .. running .. + // Goto x | .. running .. + // .. running .. | .. running .. + static ReaderWriterMutex* mutator_lock_; + + // Allow reader-writer mutual exclusion on the mark and live bitmaps of the heap. + static ReaderWriterMutex* heap_bitmap_lock_ ACQUIRED_AFTER(mutator_lock_); + + // Guards shutdown of the runtime. + static Mutex* runtime_shutdown_lock_ ACQUIRED_AFTER(heap_bitmap_lock_); + + // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads + // attaching and detaching. + static Mutex* thread_list_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_); + + // Guards breakpoints. + static Mutex* breakpoint_lock_ ACQUIRED_AFTER(thread_list_lock_); + + // Guards deoptimization requests. + static Mutex* deoptimization_lock_ ACQUIRED_AFTER(breakpoint_lock_); + + // Guards trace requests. + static Mutex* trace_lock_ ACQUIRED_AFTER(deoptimization_lock_); + + // Guards profile objects. + static Mutex* profiler_lock_ ACQUIRED_AFTER(trace_lock_); + + // Guards lists of classes within the class linker. + static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(profiler_lock_); + + // When declaring any Mutex add DEFAULT_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code + // doesn't try to hold a higher level Mutex. + #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(Locks::classlinker_classes_lock_) + + // Guards intern table. + static Mutex* intern_table_lock_ ACQUIRED_AFTER(classlinker_classes_lock_); + + // Have an exclusive aborting thread. + static Mutex* abort_lock_ ACQUIRED_AFTER(classlinker_classes_lock_); + + // Allow mutual exclusion when manipulating Thread::suspend_count_. + // TODO: Does the trade-off of a per-thread lock make sense? + static Mutex* thread_suspend_count_lock_ ACQUIRED_AFTER(abort_lock_); + + // One unexpected signal at a time lock. + static Mutex* unexpected_signal_lock_ ACQUIRED_AFTER(thread_suspend_count_lock_); + + // Have an exclusive logging thread. + static Mutex* logging_lock_ ACQUIRED_AFTER(unexpected_signal_lock_); +}; + } // namespace art #endif // ART_RUNTIME_BASE_MUTEX_H_ diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h index 754d1dd8c2..6c53563a3b 100644 --- a/runtime/class_linker-inl.h +++ b/runtime/class_linker-inl.h @@ -24,7 +24,7 @@ #include "mirror/iftable.h" #include "mirror/object_array.h" #include "object_utils.h" -#include "sirt_ref.h" +#include "sirt_ref-inl.h" namespace art { diff --git a/runtime/class_linker.h b/runtime/class_linker.h index aad7cfc875..701e62e57a 100644 --- a/runtime/class_linker.h +++ b/runtime/class_linker.h @@ -260,7 +260,7 @@ class ClassLinker { bool GenerateOatFile(const char* dex_filename, int oat_fd, const char* oat_cache_filename, - std::string* error_msg); + std::string* error_msg) LOCKS_EXCLUDED(Locks::mutator_lock_); const OatFile* FindOatFileFromOatLocation(const std::string& location, @@ -519,7 +519,7 @@ class ClassLinker { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); const OatFile* FindOpenedOatFileFromDexLocation(const char* dex_location, const uint32_t* const dex_location_checksum) - LOCKS_EXCLUDED(dex_lock); + LOCKS_EXCLUDED(dex_lock_); const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location) LOCKS_EXCLUDED(dex_lock_); const DexFile* FindDexFileInOatLocation(const char* dex_location, diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h index 7233d8ee7f..b07043f5d7 100644 --- a/runtime/compiler_callbacks.h +++ b/runtime/compiler_callbacks.h @@ -17,8 +17,8 @@ #ifndef ART_RUNTIME_COMPILER_CALLBACKS_H_ #define ART_RUNTIME_COMPILER_CALLBACKS_H_ +#include "base/mutex.h" #include "class_reference.h" -#include "locks.h" namespace art { diff --git a/runtime/debugger.cc b/runtime/debugger.cc index 3b4e9c7fd6..7e2dfd2766 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -184,14 +184,14 @@ static Dbg::HpsgWhat gDdmHpsgWhat; static Dbg::HpsgWhen gDdmNhsgWhen = Dbg::HPSG_WHEN_NEVER; static Dbg::HpsgWhat gDdmNhsgWhat; -static ObjectRegistry* gRegistry = NULL; +static ObjectRegistry* gRegistry = nullptr; // Recent allocation tracking. -static Mutex gAllocTrackerLock DEFAULT_MUTEX_ACQUIRED_AFTER("AllocTracker lock"); -AllocRecord* Dbg::recent_allocation_records_ PT_GUARDED_BY(gAllocTrackerLock) = NULL; // TODO: CircularBuffer<AllocRecord> -static size_t gAllocRecordMax GUARDED_BY(gAllocTrackerLock) = 0; -static size_t gAllocRecordHead GUARDED_BY(gAllocTrackerLock) = 0; -static size_t gAllocRecordCount GUARDED_BY(gAllocTrackerLock) = 0; +Mutex* Dbg::alloc_tracker_lock_ = nullptr; +AllocRecord* Dbg::recent_allocation_records_ = nullptr; // TODO: CircularBuffer<AllocRecord> +size_t Dbg::alloc_record_max_ = 0; +size_t Dbg::alloc_record_head_ = 0; +size_t Dbg::alloc_record_count_ = 0; // Deoptimization support. struct MethodInstrumentationRequest { @@ -468,9 +468,10 @@ void Dbg::StartJdwp() { return; } - CHECK(gRegistry == NULL); + CHECK(gRegistry == nullptr); gRegistry = new ObjectRegistry; + alloc_tracker_lock_ = new Mutex("AllocTracker lock"); // Init JDWP if the debugger is enabled. This may connect out to a // debugger, passively listen for a debugger, or block waiting for a // debugger. @@ -496,9 +497,11 @@ void Dbg::StopJdwp() { // Prevent the JDWP thread from processing JDWP incoming packets after we close the connection. Disposed(); delete gJdwpState; - gJdwpState = NULL; + gJdwpState = nullptr; delete gRegistry; - gRegistry = NULL; + gRegistry = nullptr; + delete alloc_tracker_lock_; + alloc_tracker_lock_ = nullptr; } void Dbg::GcDidFinish() { @@ -3695,15 +3698,15 @@ static size_t GetAllocTrackerMax() { } void Dbg::SetAllocTrackingEnabled(bool enabled) { - MutexLock mu(Thread::Current(), gAllocTrackerLock); + MutexLock mu(Thread::Current(), *alloc_tracker_lock_); if (enabled) { if (recent_allocation_records_ == NULL) { - gAllocRecordMax = GetAllocTrackerMax(); - LOG(INFO) << "Enabling alloc tracker (" << gAllocRecordMax << " entries of " + alloc_record_max_ = GetAllocTrackerMax(); + LOG(INFO) << "Enabling alloc tracker (" << alloc_record_max_ << " entries of " << kMaxAllocRecordStackDepth << " frames, taking " - << PrettySize(sizeof(AllocRecord) * gAllocRecordMax) << ")"; - gAllocRecordHead = gAllocRecordCount = 0; - recent_allocation_records_ = new AllocRecord[gAllocRecordMax]; + << PrettySize(sizeof(AllocRecord) * alloc_record_max_) << ")"; + alloc_record_head_ = alloc_record_count_ = 0; + recent_allocation_records_ = new AllocRecord[alloc_record_max_]; CHECK(recent_allocation_records_ != NULL); } Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints(); @@ -3750,18 +3753,18 @@ void Dbg::RecordAllocation(mirror::Class* type, size_t byte_count) { Thread* self = Thread::Current(); CHECK(self != NULL); - MutexLock mu(self, gAllocTrackerLock); + MutexLock mu(self, *alloc_tracker_lock_); if (recent_allocation_records_ == NULL) { return; } // Advance and clip. - if (++gAllocRecordHead == gAllocRecordMax) { - gAllocRecordHead = 0; + if (++alloc_record_head_ == alloc_record_max_) { + alloc_record_head_ = 0; } // Fill in the basics. - AllocRecord* record = &recent_allocation_records_[gAllocRecordHead]; + AllocRecord* record = &recent_allocation_records_[alloc_record_head_]; record->type = type; record->byte_count = byte_count; record->thin_lock_id = self->GetThreadId(); @@ -3770,8 +3773,8 @@ void Dbg::RecordAllocation(mirror::Class* type, size_t byte_count) { AllocRecordStackVisitor visitor(self, record); visitor.WalkStack(); - if (gAllocRecordCount < gAllocRecordMax) { - ++gAllocRecordCount; + if (alloc_record_count_ < alloc_record_max_) { + ++alloc_record_count_; } } @@ -3783,13 +3786,14 @@ void Dbg::RecordAllocation(mirror::Class* type, size_t byte_count) { // // We need to handle underflow in our circular buffer, so we add // gAllocRecordMax and then mask it back down. -static inline int HeadIndex() EXCLUSIVE_LOCKS_REQUIRED(gAllocTrackerLock) { - return (gAllocRecordHead+1 + gAllocRecordMax - gAllocRecordCount) & (gAllocRecordMax-1); +size_t Dbg::HeadIndex() { + return (Dbg::alloc_record_head_ + 1 + Dbg::alloc_record_max_ - Dbg::alloc_record_count_) & + (Dbg::alloc_record_max_ - 1); } void Dbg::DumpRecentAllocations() { ScopedObjectAccess soa(Thread::Current()); - MutexLock mu(soa.Self(), gAllocTrackerLock); + MutexLock mu(soa.Self(), *alloc_tracker_lock_); if (recent_allocation_records_ == NULL) { LOG(INFO) << "Not recording tracked allocations"; return; @@ -3798,9 +3802,9 @@ void Dbg::DumpRecentAllocations() { // "i" is the head of the list. We want to start at the end of the // list and move forward to the tail. size_t i = HeadIndex(); - size_t count = gAllocRecordCount; + size_t count = alloc_record_count_; - LOG(INFO) << "Tracked allocations, (head=" << gAllocRecordHead << " count=" << count << ")"; + LOG(INFO) << "Tracked allocations, (head=" << alloc_record_head_ << " count=" << count << ")"; while (count--) { AllocRecord* record = &recent_allocation_records_[i]; @@ -3820,22 +3824,20 @@ void Dbg::DumpRecentAllocations() { usleep(40000); } - i = (i + 1) & (gAllocRecordMax-1); + i = (i + 1) & (alloc_record_max_ - 1); } } void Dbg::UpdateObjectPointers(IsMarkedCallback* visitor, void* arg) { - { - MutexLock mu(Thread::Current(), gAllocTrackerLock); - if (recent_allocation_records_ != nullptr) { - size_t i = HeadIndex(); - size_t count = gAllocRecordCount; - while (count--) { - AllocRecord* record = &recent_allocation_records_[i]; - DCHECK(record != nullptr); - record->UpdateObjectPointers(visitor, arg); - i = (i + 1) & (gAllocRecordMax - 1); - } + if (recent_allocation_records_ != nullptr) { + MutexLock mu(Thread::Current(), *alloc_tracker_lock_); + size_t i = HeadIndex(); + size_t count = alloc_record_count_; + while (count--) { + AllocRecord* record = &recent_allocation_records_[i]; + DCHECK(record != nullptr); + record->UpdateObjectPointers(visitor, arg); + i = (i + 1) & (alloc_record_max_ - 1); } } if (gRegistry != nullptr) { @@ -3941,7 +3943,7 @@ jbyteArray Dbg::GetRecentAllocations() { Thread* self = Thread::Current(); std::vector<uint8_t> bytes; { - MutexLock mu(self, gAllocTrackerLock); + MutexLock mu(self, *alloc_tracker_lock_); // // Part 1: generate string tables. // @@ -3949,7 +3951,7 @@ jbyteArray Dbg::GetRecentAllocations() { StringTable method_names; StringTable filenames; - int count = gAllocRecordCount; + int count = alloc_record_count_; int idx = HeadIndex(); while (count--) { AllocRecord* record = &recent_allocation_records_[idx]; @@ -3967,10 +3969,10 @@ jbyteArray Dbg::GetRecentAllocations() { } } - idx = (idx + 1) & (gAllocRecordMax-1); + idx = (idx + 1) & (alloc_record_max_ - 1); } - LOG(INFO) << "allocation records: " << gAllocRecordCount; + LOG(INFO) << "allocation records: " << alloc_record_count_; // // Part 2: Generate the output and store it in the buffer. @@ -3991,14 +3993,14 @@ jbyteArray Dbg::GetRecentAllocations() { // (2b) number of class name strings // (2b) number of method name strings // (2b) number of source file name strings - JDWP::Append2BE(bytes, gAllocRecordCount); + JDWP::Append2BE(bytes, alloc_record_count_); size_t string_table_offset = bytes.size(); JDWP::Append4BE(bytes, 0); // We'll patch this later... JDWP::Append2BE(bytes, class_names.Size()); JDWP::Append2BE(bytes, method_names.Size()); JDWP::Append2BE(bytes, filenames.Size()); - count = gAllocRecordCount; + count = alloc_record_count_; idx = HeadIndex(); while (count--) { // For each entry: @@ -4032,7 +4034,7 @@ jbyteArray Dbg::GetRecentAllocations() { JDWP::Append2BE(bytes, record->stack[stack_frame].LineNumber()); } - idx = (idx + 1) & (gAllocRecordMax-1); + idx = (idx + 1) & (alloc_record_max_ - 1); } // (xb) class name strings diff --git a/runtime/debugger.h b/runtime/debugger.h index 5d269ee457..6c44bdea8f 100644 --- a/runtime/debugger.h +++ b/runtime/debugger.h @@ -391,7 +391,7 @@ class Dbg { LOCKS_EXCLUDED(Locks::deoptimization_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); static void DisableFullDeoptimization() - EXCLUSIVE_LOCKS_REQUIRED(event_list_lock_) + LOCKS_EXCLUDED(Locks::deoptimization_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Manage deoptimization after updating JDWP events list. This must be done while all mutator @@ -448,8 +448,11 @@ class Dbg { static void RecordAllocation(mirror::Class* type, size_t byte_count) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); static void SetAllocTrackingEnabled(bool enabled); - static inline bool IsAllocTrackingEnabled() { return recent_allocation_records_ != NULL; } + static bool IsAllocTrackingEnabled() { + return recent_allocation_records_ != nullptr; + } static jbyteArray GetRecentAllocations() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + static size_t HeadIndex() EXCLUSIVE_LOCKS_REQUIRED(alloc_tracker_lock_); static void DumpRecentAllocations(); // Updates the stored direct object pointers (called from SweepSystemWeaks). @@ -488,7 +491,14 @@ class Dbg { static void PostThreadStartOrStop(Thread*, uint32_t) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - static AllocRecord* recent_allocation_records_; + static Mutex* alloc_tracker_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + + static AllocRecord* recent_allocation_records_ PT_GUARDED_BY(alloc_tracker_lock_); + static size_t alloc_record_max_ GUARDED_BY(alloc_tracker_lock_); + static size_t alloc_record_head_ GUARDED_BY(alloc_tracker_lock_); + static size_t alloc_record_count_ GUARDED_BY(alloc_tracker_lock_); + + DISALLOW_COPY_AND_ASSIGN(Dbg); }; #define CHUNK_TYPE(_name) \ diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h index a8fb6c14a1..8b48b3647f 100644 --- a/runtime/entrypoints/entrypoint_utils.h +++ b/runtime/entrypoints/entrypoint_utils.h @@ -29,9 +29,8 @@ #include "mirror/class-inl.h" #include "mirror/object-inl.h" #include "mirror/throwable.h" -#include "locks.h" #include "object_utils.h" -#include "sirt_ref.h" +#include "sirt_ref-inl.h" #include "thread.h" namespace art { @@ -642,8 +641,7 @@ static inline mirror::String* ResolveStringFromCode(mirror::ArtMethod* referrer, } static inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) - UNLOCK_FUNCTION(monitor_lock_) { + NO_THREAD_SAFETY_ANALYSIS /* SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) */ { // Save any pending exception over monitor exit call. mirror::Throwable* saved_exception = NULL; ThrowLocation saved_throw_location; diff --git a/runtime/entrypoints/portable/portable_jni_entrypoints.cc b/runtime/entrypoints/portable/portable_jni_entrypoints.cc index de1e32ef17..17ad4d047c 100644 --- a/runtime/entrypoints/portable/portable_jni_entrypoints.cc +++ b/runtime/entrypoints/portable/portable_jni_entrypoints.cc @@ -23,7 +23,7 @@ namespace art { // Called on entry to JNI, transition out of Runnable and release share of mutator_lock_. extern "C" uint32_t art_portable_jni_method_start(Thread* self) - UNLOCK_FUNCTION(GlobalSynchronizatio::mutator_lock_) { + UNLOCK_FUNCTION(Locks::mutator_lock_) { JNIEnvExt* env = self->GetJniEnv(); uint32_t saved_local_ref_cookie = env->local_ref_cookie; env->local_ref_cookie = env->locals.GetSegmentState(); @@ -32,7 +32,7 @@ extern "C" uint32_t art_portable_jni_method_start(Thread* self) } extern "C" uint32_t art_portable_jni_method_start_synchronized(jobject to_lock, Thread* self) - UNLOCK_FUNCTION(Locks::mutator_lock_) { + UNLOCK_FUNCTION(Locks::mutator_lock_) NO_THREAD_SAFETY_ANALYSIS { self->DecodeJObject(to_lock)->MonitorEnter(self); return art_portable_jni_method_start(self); } diff --git a/runtime/entrypoints/portable/portable_lock_entrypoints.cc b/runtime/entrypoints/portable/portable_lock_entrypoints.cc index 44d3da9897..358ac233dc 100644 --- a/runtime/entrypoints/portable/portable_lock_entrypoints.cc +++ b/runtime/entrypoints/portable/portable_lock_entrypoints.cc @@ -20,8 +20,9 @@ namespace art { extern "C" void art_portable_lock_object_from_code(mirror::Object* obj, Thread* thread) - EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) { - DCHECK(obj != NULL); // Assumed to have been checked before entry. + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + NO_THREAD_SAFETY_ANALYSIS /* EXCLUSIVE_LOCK_FUNCTION(Monitor::monitor_lock_) */ { + DCHECK(obj != nullptr); // Assumed to have been checked before entry. obj->MonitorEnter(thread); // May block. DCHECK(thread->HoldsLock(obj)); // Only possible exception is NPE and is handled before entry. @@ -29,8 +30,9 @@ extern "C" void art_portable_lock_object_from_code(mirror::Object* obj, Thread* } extern "C" void art_portable_unlock_object_from_code(mirror::Object* obj, Thread* thread) - UNLOCK_FUNCTION(monitor_lock_) { - DCHECK(obj != NULL); // Assumed to have been checked before entry. + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + NO_THREAD_SAFETY_ANALYSIS /* UNLOCK_FUNCTION(Monitor::monitor_lock_) */ { + DCHECK(obj != nullptr); // Assumed to have been checked before entry. // MonitorExit may throw exception. obj->MonitorExit(thread); } diff --git a/runtime/entrypoints/quick/callee_save_frame.h b/runtime/entrypoints/quick/callee_save_frame.h index 8f7004920d..3fd4adc0a7 100644 --- a/runtime/entrypoints/quick/callee_save_frame.h +++ b/runtime/entrypoints/quick/callee_save_frame.h @@ -26,8 +26,8 @@ class ArtMethod; } // namespace mirror // Place a special frame at the TOS that will save the callee saves for the given type. -static void FinishCalleeSaveFrameSetup(Thread* self, mirror::ArtMethod** sp, - Runtime::CalleeSaveType type) +static inline void FinishCalleeSaveFrameSetup(Thread* self, mirror::ArtMethod** sp, + Runtime::CalleeSaveType type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { // Be aware the store below may well stomp on an incoming argument. Locks::mutator_lock_->AssertSharedHeld(self); diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc index 737fa3e735..116957d54c 100644 --- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc @@ -24,6 +24,7 @@ #include "object_utils.h" #include "scoped_thread_state_change.h" #include "thread.h" +#include "verify_object-inl.h" namespace art { diff --git a/runtime/entrypoints/quick/quick_lock_entrypoints.cc b/runtime/entrypoints/quick/quick_lock_entrypoints.cc index 5bc7f4cdec..817d053c9b 100644 --- a/runtime/entrypoints/quick/quick_lock_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_lock_entrypoints.cc @@ -21,7 +21,8 @@ namespace art { extern "C" int artLockObjectFromCode(mirror::Object* obj, Thread* self, mirror::ArtMethod** sp) - EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) { + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + NO_THREAD_SAFETY_ANALYSIS /* EXCLUSIVE_LOCK_FUNCTION(Monitor::monitor_lock_) */ { FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); if (UNLIKELY(obj == NULL)) { ThrowLocation throw_location(self->GetCurrentLocationForThrow()); @@ -42,7 +43,8 @@ extern "C" int artLockObjectFromCode(mirror::Object* obj, Thread* self, mirror:: } extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self, mirror::ArtMethod** sp) - UNLOCK_FUNCTION(monitor_lock_) { + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + NO_THREAD_SAFETY_ANALYSIS /* UNLOCK_FUNCTION(Monitor::monitor_lock_) */ { FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); if (UNLIKELY(obj == NULL)) { ThrowLocation throw_location(self->GetCurrentLocationForThrow()); diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index bf8b8bab64..1bbaa6a7fd 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -820,84 +820,492 @@ extern "C" const void* artQuickResolutionTrampoline(mirror::ArtMethod* called, return code; } -// Visits arguments on the stack placing them into a region lower down the stack for the benefit -// of transitioning into native code. -class BuildGenericJniFrameVisitor FINAL : public QuickArgumentVisitor { + + +/* + * This class uses a couple of observations to unite the different calling conventions through + * a few constants. + * + * 1) Number of registers used for passing is normally even, so counting down has no penalty for + * possible alignment. + * 2) Known 64b architectures store 8B units on the stack, both for integral and floating point + * types, so using uintptr_t is OK. Also means that we can use kRegistersNeededX to denote + * when we have to split things + * 3) The only soft-float, Arm, is 32b, so no widening needs to be taken into account for floats + * and we can use Int handling directly. + * 4) Only 64b architectures widen, and their stack is aligned 8B anyways, so no padding code + * necessary when widening. Also, widening of Ints will take place implicitly, and the + * extension should be compatible with Aarch64, which mandates copying the available bits + * into LSB and leaving the rest unspecified. + * 5) Aligning longs and doubles is necessary on arm only, and it's the same in registers and on + * the stack. + * 6) There is only little endian. + * + * + * Actual work is supposed to be done in a delegate of the template type. The interface is as + * follows: + * + * void PushGpr(uintptr_t): Add a value for the next GPR + * + * void PushFpr4(float): Add a value for the next FPR of size 32b. Is only called if we need + * padding, that is, think the architecture is 32b and aligns 64b. + * + * void PushFpr8(uint64_t): Push a double. We _will_ call this on 32b, it's the callee's job to + * split this if necessary. The current state will have aligned, if + * necessary. + * + * void PushStack(uintptr_t): Push a value to the stack. + * + * uintptr_t PushSirt(mirror::Object* ref): Add a reference to the Sirt. Is guaranteed != nullptr. + * Must return the jobject, that is, the reference to the + * entry in the Sirt. + * + */ +template <class T> class BuildGenericJniFrameStateMachine { + public: #if defined(__arm__) // TODO: These are all dummy values! - static constexpr bool kNativeSoftFloatAbi = false; // This is a hard float ABI. - static constexpr size_t kNumNativeGprArgs = 3; // 3 arguments passed in GPRs. + static constexpr bool kNativeSoftFloatAbi = true; + static constexpr size_t kNumNativeGprArgs = 4; // 4 arguments passed in GPRs, r0-r3 static constexpr size_t kNumNativeFprArgs = 0; // 0 arguments passed in FPRs. - static constexpr size_t kGprStackOffset = 4336; - static constexpr size_t kFprStackOffset = 4336 - 6*8; - static constexpr size_t kCallStackStackOffset = 4336 - 112; - static constexpr size_t kRegistersNeededForLong = 2; static constexpr size_t kRegistersNeededForDouble = 2; + static constexpr bool kMultiRegistersAligned = true; + static constexpr bool kMultiRegistersWidened = false; + static constexpr bool kAlignLongOnStack = true; + static constexpr bool kAlignDoubleOnStack = true; #elif defined(__mips__) // TODO: These are all dummy values! static constexpr bool kNativeSoftFloatAbi = true; // This is a hard float ABI. static constexpr size_t kNumNativeGprArgs = 0; // 6 arguments passed in GPRs. static constexpr size_t kNumNativeFprArgs = 0; // 8 arguments passed in FPRs. - // update these - static constexpr size_t kGprStackOffset = 4336; - static constexpr size_t kFprStackOffset = 4336 - 6*8; - static constexpr size_t kCallStackStackOffset = 4336 - 112; - static constexpr size_t kRegistersNeededForLong = 2; static constexpr size_t kRegistersNeededForDouble = 2; + static constexpr bool kMultiRegistersAligned = true; + static constexpr bool kMultiRegistersWidened = true; + static constexpr bool kAlignLongOnStack = false; + static constexpr bool kAlignDoubleOnStack = false; #elif defined(__i386__) // TODO: Check these! - static constexpr bool kNativeSoftFloatAbi = true; // This is a soft float ABI. + static constexpr bool kNativeSoftFloatAbi = false; // Not using int registers for fp static constexpr size_t kNumNativeGprArgs = 0; // 6 arguments passed in GPRs. static constexpr size_t kNumNativeFprArgs = 0; // 8 arguments passed in FPRs. - // update these - static constexpr size_t kGprStackOffset = 4336; - static constexpr size_t kFprStackOffset = 4336 - 6*8; - static constexpr size_t kCallStackStackOffset = 4336 - 112; - static constexpr size_t kRegistersNeededForLong = 2; static constexpr size_t kRegistersNeededForDouble = 2; + static constexpr bool kMultiRegistersAligned = false; // x86 not using regs, anyways + static constexpr bool kMultiRegistersWidened = false; + static constexpr bool kAlignLongOnStack = false; + static constexpr bool kAlignDoubleOnStack = false; #elif defined(__x86_64__) static constexpr bool kNativeSoftFloatAbi = false; // This is a hard float ABI. static constexpr size_t kNumNativeGprArgs = 6; // 6 arguments passed in GPRs. static constexpr size_t kNumNativeFprArgs = 8; // 8 arguments passed in FPRs. - static constexpr size_t kGprStackOffset = 4336; - static constexpr size_t kFprStackOffset = 4336 - 6*8; - static constexpr size_t kCallStackStackOffset = 4336 - 112; - static constexpr size_t kRegistersNeededForLong = 1; static constexpr size_t kRegistersNeededForDouble = 1; + static constexpr bool kMultiRegistersAligned = false; + static constexpr bool kMultiRegistersWidened = true; + static constexpr bool kAlignLongOnStack = false; + static constexpr bool kAlignDoubleOnStack = false; #else #error "Unsupported architecture" #endif + public: + explicit BuildGenericJniFrameStateMachine(T* delegate) : gpr_index_(kNumNativeGprArgs), + fpr_index_(kNumNativeFprArgs), + stack_entries_(0), + delegate_(delegate) { + // For register alignment, we want to assume that counters (gpr_index_, fpr_index_) are even iff + // the next register is even; counting down is just to make the compiler happy... + CHECK_EQ(kNumNativeGprArgs % 2, 0U); + CHECK_EQ(kNumNativeFprArgs % 2, 0U); + } + + virtual ~BuildGenericJniFrameStateMachine() {} + + bool HavePointerGpr() { + return gpr_index_ > 0; + } + + void AdvancePointer(void* val) { + if (HavePointerGpr()) { + gpr_index_--; + PushGpr(reinterpret_cast<uintptr_t>(val)); + } else { + stack_entries_++; // TODO: have a field for pointer length as multiple of 32b + PushStack(reinterpret_cast<uintptr_t>(val)); + gpr_index_ = 0; + } + } + + + bool HaveSirtGpr() { + return gpr_index_ > 0; + } + + void AdvanceSirt(mirror::Object* ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + uintptr_t sirtRef; + if (ptr != nullptr) { + sirtRef = PushSirt(ptr); + } else { + sirtRef = reinterpret_cast<uintptr_t>(nullptr); + } + if (HaveSirtGpr()) { + gpr_index_--; + PushGpr(sirtRef); + } else { + stack_entries_++; + PushStack(sirtRef); + gpr_index_ = 0; + } + } + + + bool HaveIntGpr() { + return gpr_index_ > 0; + } + + void AdvanceInt(uint32_t val) { + if (HaveIntGpr()) { + gpr_index_--; + PushGpr(val); + } else { + stack_entries_++; + PushStack(val); + gpr_index_ = 0; + } + } + + + bool HaveLongGpr() { + return gpr_index_ >= kRegistersNeededForLong + (LongGprNeedsPadding() ? 1 : 0); + } + + bool LongGprNeedsPadding() { + return kRegistersNeededForLong > 1 && // only pad when using multiple registers + kAlignLongOnStack && // and when it needs alignment + (gpr_index_ & 1) == 1; // counter is odd, see constructor + } + + bool LongStackNeedsPadding() { + return kRegistersNeededForLong > 1 && // only pad when using multiple registers + kAlignLongOnStack && // and when it needs 8B alignment + (stack_entries_ & 1) == 1; // counter is odd + } + + void AdvanceLong(uint64_t val) { + if (HaveLongGpr()) { + if (LongGprNeedsPadding()) { + PushGpr(0); + gpr_index_--; + } + if (kRegistersNeededForLong == 1) { + PushGpr(static_cast<uintptr_t>(val)); + } else { + PushGpr(static_cast<uintptr_t>(val & 0xFFFFFFFF)); + PushGpr(static_cast<uintptr_t>((val >> 32) & 0xFFFFFFFF)); + } + gpr_index_ -= kRegistersNeededForLong; + } else { + if (LongStackNeedsPadding()) { + PushStack(0); + stack_entries_++; + } + if (kRegistersNeededForLong == 1) { + PushStack(static_cast<uintptr_t>(val)); + stack_entries_++; + } else { + PushStack(static_cast<uintptr_t>(val & 0xFFFFFFFF)); + PushStack(static_cast<uintptr_t>((val >> 32) & 0xFFFFFFFF)); + stack_entries_ += 2; + } + gpr_index_ = 0; + } + } + + + bool HaveFloatFpr() { + return fpr_index_ > 0; + } + + // TODO: please review this bit representation retrieving. + template <typename U, typename V> V convert(U in) { + CHECK_LE(sizeof(U), sizeof(V)); + union { U u; V v; } tmp; + tmp.u = in; + return tmp.v; + } + + void AdvanceFloat(float val) { + if (kNativeSoftFloatAbi) { + AdvanceInt(convert<float, uint32_t>(val)); + } else { + if (HaveFloatFpr()) { + fpr_index_--; + if (kRegistersNeededForDouble == 1) { + if (kMultiRegistersWidened) { + PushFpr8(convert<double, uint64_t>(val)); + } else { + // No widening, just use the bits. + PushFpr8(convert<float, uint64_t>(val)); + } + } else { + PushFpr4(val); + } + } else { + stack_entries_++; + if (kRegistersNeededForDouble == 1 && kMultiRegistersWidened) { + // Need to widen before storing: Note the "double" in the template instantiation. + PushStack(convert<double, uintptr_t>(val)); + } else { + PushStack(convert<float, uintptr_t>(val)); + } + fpr_index_ = 0; + } + } + } + + + bool HaveDoubleFpr() { + return fpr_index_ >= kRegistersNeededForDouble + (DoubleFprNeedsPadding() ? 1 : 0); + } + + bool DoubleFprNeedsPadding() { + return kRegistersNeededForDouble > 1 && // only pad when using multiple registers + kAlignDoubleOnStack && // and when it needs alignment + (fpr_index_ & 1) == 1; // counter is odd, see constructor + } + + bool DoubleStackNeedsPadding() { + return kRegistersNeededForDouble > 1 && // only pad when using multiple registers + kAlignDoubleOnStack && // and when it needs 8B alignment + (stack_entries_ & 1) == 1; // counter is odd + } + + void AdvanceDouble(uint64_t val) { + if (kNativeSoftFloatAbi) { + AdvanceLong(val); + } else { + if (HaveDoubleFpr()) { + if (DoubleFprNeedsPadding()) { + PushFpr4(0); + fpr_index_--; + } + PushFpr8(val); + fpr_index_ -= kRegistersNeededForDouble; + } else { + if (DoubleStackNeedsPadding()) { + PushStack(0); + stack_entries_++; + } + if (kRegistersNeededForDouble == 1) { + PushStack(static_cast<uintptr_t>(val)); + stack_entries_++; + } else { + PushStack(static_cast<uintptr_t>(val & 0xFFFFFFFF)); + PushStack(static_cast<uintptr_t>((val >> 32) & 0xFFFFFFFF)); + stack_entries_ += 2; + } + fpr_index_ = 0; + } + } + } + + uint32_t getStackEntries() { + return stack_entries_; + } + + uint32_t getNumberOfUsedGprs() { + return kNumNativeGprArgs - gpr_index_; + } + + uint32_t getNumberOfUsedFprs() { + return kNumNativeFprArgs - fpr_index_; + } + + private: + void PushGpr(uintptr_t val) { + delegate_->PushGpr(val); + } + void PushFpr4(float val) { + delegate_->PushFpr4(val); + } + void PushFpr8(uint64_t val) { + delegate_->PushFpr8(val); + } + void PushStack(uintptr_t val) { + delegate_->PushStack(val); + } + uintptr_t PushSirt(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return delegate_->PushSirt(ref); + } + + uint32_t gpr_index_; // Number of free GPRs + uint32_t fpr_index_; // Number of free FPRs + uint32_t stack_entries_; // Stack entries are in multiples of 32b, as floats are usually not + // extended + T* delegate_; // What Push implementation gets called +}; +class ComputeGenericJniFrameSize FINAL { + public: + ComputeGenericJniFrameSize() : num_sirt_references_(0), num_stack_entries_(0) {} + + // (negative) offset from SP to top of Sirt. + uint32_t GetSirtOffset() { + return 8; + } + + uint32_t GetFirstSirtEntryOffset() { + return GetSirtOffset() + sizeof(StackReference<mirror::Object>); + } + + uint32_t GetNumSirtReferences() { + return num_sirt_references_; + } + + uint32_t GetStackSize() { + return num_stack_entries_ * sizeof(uintptr_t); + } + + void ComputeLayout(bool is_static, const char* shorty, uint32_t shorty_len, void* sp, + StackReference<mirror::Object>** start_sirt, StackIndirectReferenceTable** table, + uint32_t* sirt_entries, uintptr_t** start_stack, uintptr_t** start_gpr, + uint32_t** start_fpr, void** code_return, size_t* overall_size) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + ComputeAll(is_static, shorty, shorty_len); + + uint8_t* sp8 = reinterpret_cast<uint8_t*>(sp); + *start_sirt = reinterpret_cast<StackReference<mirror::Object>*>(sp8-GetFirstSirtEntryOffset()); + + // Add padding entries if necessary for alignment. + if (sizeof(uintptr_t) < sizeof(uint64_t)) { + uint32_t size = sizeof(uintptr_t) * num_sirt_references_; + uint32_t rem = size % 8; + if (rem != 0) { + DCHECK_EQ(rem, 4U); + num_sirt_references_++; + } + } + *sirt_entries = num_sirt_references_; + size_t sirt_size = StackIndirectReferenceTable::SizeOf(num_sirt_references_); + sp8 -= GetSirtOffset() + sirt_size; + *table = reinterpret_cast<StackIndirectReferenceTable*>(sp8); + + sp8 -= GetStackSize(); + // Now align the call stack under the Sirt. This aligns by 16. + uintptr_t mask = ~0x0F; + sp8 = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(sp8) & mask); + *start_stack = reinterpret_cast<uintptr_t*>(sp8); + + // put fprs and gprs below + // Assumption is OK right now, as we have soft-float arm + size_t fregs = BuildGenericJniFrameStateMachine<ComputeGenericJniFrameSize>::kNumNativeFprArgs; + sp8 -= fregs * sizeof(uintptr_t); + *start_fpr = reinterpret_cast<uint32_t*>(sp8); + size_t iregs = BuildGenericJniFrameStateMachine<ComputeGenericJniFrameSize>::kNumNativeGprArgs; + sp8 -= iregs * sizeof(uintptr_t); + *start_gpr = reinterpret_cast<uintptr_t*>(sp8); + + // reserve space for the code pointer + sp8 -= sizeof(void*); + *code_return = reinterpret_cast<void*>(sp8); + + *overall_size = reinterpret_cast<uint8_t*>(sp) - sp8; + } + + void ComputeSirtOffset() { } // nothing to do, static right now + + void ComputeAll(bool is_static, const char* shorty, uint32_t shorty_len) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + BuildGenericJniFrameStateMachine<ComputeGenericJniFrameSize> sm(this); + + // JNIEnv + sm.AdvancePointer(nullptr); + + // Class object or this as first argument + sm.AdvanceSirt(reinterpret_cast<mirror::Object*>(0x12345678)); + + for (uint32_t i = 1; i < shorty_len; ++i) { + Primitive::Type cur_type_ = Primitive::GetType(shorty[i]); + switch (cur_type_) { + case Primitive::kPrimNot: + sm.AdvanceSirt(reinterpret_cast<mirror::Object*>(0x12345678)); + break; + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + sm.AdvanceInt(0); + break; + case Primitive::kPrimFloat: + sm.AdvanceFloat(0); + break; + case Primitive::kPrimDouble: + sm.AdvanceDouble(0); + break; + case Primitive::kPrimLong: + sm.AdvanceLong(0); + break; + default: + LOG(FATAL) << "Unexpected type: " << cur_type_ << " in " << shorty; + } + } + + num_stack_entries_ = sm.getStackEntries(); + } + + void PushGpr(uintptr_t /* val */) { + // not optimizing registers, yet + } + + void PushFpr4(float /* val */) { + // not optimizing registers, yet + } + + void PushFpr8(uint64_t /* val */) { + // not optimizing registers, yet + } + + void PushStack(uintptr_t /* val */) { + // counting is already done in the superclass + } + + uintptr_t PushSirt(mirror::Object* /* ptr */) { + num_sirt_references_++; + return reinterpret_cast<uintptr_t>(nullptr); + } + + private: + uint32_t num_sirt_references_; + uint32_t num_stack_entries_; +}; + +// Visits arguments on the stack placing them into a region lower down the stack for the benefit +// of transitioning into native code. +class BuildGenericJniFrameVisitor FINAL : public QuickArgumentVisitor { public: BuildGenericJniFrameVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty, uint32_t shorty_len, Thread* self) : - QuickArgumentVisitor(sp, is_static, shorty, shorty_len) { - // size of cookie plus padding - uint8_t* sp8 = reinterpret_cast<uint8_t*>(sp); - top_of_sirt_ = sp8 - 8; - cur_sirt_entry_ = reinterpret_cast<StackReference<mirror::Object>*>(top_of_sirt_) - 1; + QuickArgumentVisitor(sp, is_static, shorty, shorty_len), sm_(this) { + ComputeGenericJniFrameSize fsc; + fsc.ComputeLayout(is_static, shorty, shorty_len, sp, &cur_sirt_entry_, &sirt_, + &sirt_expected_refs_, &cur_stack_arg_, &cur_gpr_reg_, &cur_fpr_reg_, + &code_return_, &alloca_used_size_); sirt_number_of_references_ = 0; - gpr_index_ = kNumNativeGprArgs; - fpr_index_ = kNumNativeFprArgs; - - cur_gpr_reg_ = reinterpret_cast<uintptr_t*>(sp8 - kGprStackOffset); - cur_fpr_reg_ = reinterpret_cast<uint32_t*>(sp8 - kFprStackOffset); - cur_stack_arg_ = reinterpret_cast<uintptr_t*>(sp8 - kCallStackStackOffset); + top_of_sirt_ = cur_sirt_entry_; // jni environment is always first argument - PushPointer(self->GetJniEnv()); + sm_.AdvancePointer(self->GetJniEnv()); if (is_static) { - PushArgumentInSirt((*sp)->GetDeclaringClass()); + sm_.AdvanceSirt((*sp)->GetDeclaringClass()); } } @@ -911,7 +1319,7 @@ class BuildGenericJniFrameVisitor FINAL : public QuickArgumentVisitor { } else { long_arg = *reinterpret_cast<jlong*>(GetParamAddress()); } - PushLongArgument(long_arg); + sm_.AdvanceLong(long_arg); break; } case Primitive::kPrimDouble: { @@ -922,24 +1330,24 @@ class BuildGenericJniFrameVisitor FINAL : public QuickArgumentVisitor { } else { double_arg = *reinterpret_cast<uint64_t*>(GetParamAddress()); } - PushDoubleArgument(double_arg); + sm_.AdvanceDouble(double_arg); break; } case Primitive::kPrimNot: { StackReference<mirror::Object>* stack_ref = reinterpret_cast<StackReference<mirror::Object>*>(GetParamAddress()); - PushArgumentInSirt(stack_ref->AsMirrorPtr()); + sm_.AdvanceSirt(stack_ref->AsMirrorPtr()); break; } case Primitive::kPrimFloat: - PushFloatArgument(*reinterpret_cast<int32_t*>(GetParamAddress())); + sm_.AdvanceFloat(*reinterpret_cast<float*>(GetParamAddress())); break; case Primitive::kPrimBoolean: // Fall-through. case Primitive::kPrimByte: // Fall-through. case Primitive::kPrimChar: // Fall-through. case Primitive::kPrimShort: // Fall-through. case Primitive::kPrimInt: // Fall-through. - PushIntArgument(*reinterpret_cast<jint*>(GetParamAddress())); + sm_.AdvanceInt(*reinterpret_cast<jint*>(GetParamAddress())); break; case Primitive::kPrimVoid: LOG(FATAL) << "UNREACHABLE"; @@ -948,149 +1356,87 @@ class BuildGenericJniFrameVisitor FINAL : public QuickArgumentVisitor { } void FinalizeSirt(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - if (!IsAligned<8>(StackIndirectReferenceTable::SizeOf(sirt_number_of_references_))) { - sirt_number_of_references_++; + // Initialize padding entries. + while (sirt_number_of_references_ < sirt_expected_refs_) { *cur_sirt_entry_ = StackReference<mirror::Object>(); cur_sirt_entry_--; + sirt_number_of_references_++; } - CHECK(IsAligned<8>(StackIndirectReferenceTable::SizeOf(sirt_number_of_references_))); - StackIndirectReferenceTable* sirt = reinterpret_cast<StackIndirectReferenceTable*>( - top_of_sirt_ - StackIndirectReferenceTable::SizeOf(sirt_number_of_references_)); + sirt_->SetNumberOfReferences(sirt_expected_refs_); - sirt->SetNumberOfReferences(sirt_number_of_references_); - self->PushSirt(sirt); + // Install Sirt. + self->PushSirt(sirt_); } jobject GetFirstSirtEntry() { - return reinterpret_cast<jobject>(reinterpret_cast<StackReference<mirror::Object>*>(top_of_sirt_) - 1); + return reinterpret_cast<jobject>(top_of_sirt_); } - private: - void PushArgumentInSirt(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - // Do something to push into the SIRT. - uintptr_t sirt_or_null; - if (obj != nullptr) { - sirt_number_of_references_++; - *cur_sirt_entry_ = StackReference<mirror::Object>::FromMirrorPtr(obj); - sirt_or_null = reinterpret_cast<uintptr_t>(cur_sirt_entry_); - cur_sirt_entry_--; - } else { - sirt_or_null = reinterpret_cast<uintptr_t>(nullptr); - } - // Push the GPR or stack arg. - if (gpr_index_ > 0) { - *cur_gpr_reg_ = sirt_or_null; - cur_gpr_reg_++; - gpr_index_--; - } else { - *cur_stack_arg_ = sirt_or_null; - cur_stack_arg_++; - } + void PushGpr(uintptr_t val) { + *cur_gpr_reg_ = val; + cur_gpr_reg_++; } - void PushPointer(void* val) { - if (gpr_index_ > 0) { - *cur_gpr_reg_ = reinterpret_cast<uintptr_t>(val); - cur_gpr_reg_++; - gpr_index_--; - } else { - *cur_stack_arg_ = reinterpret_cast<uintptr_t>(val); - cur_stack_arg_++; - } + void PushFpr4(float val) { + *cur_fpr_reg_ = val; + cur_fpr_reg_++; } - void PushIntArgument(jint val) { - if (gpr_index_ > 0) { - *cur_gpr_reg_ = val; - cur_gpr_reg_++; - gpr_index_--; - } else { - *cur_stack_arg_ = val; - cur_stack_arg_++; - } + void PushFpr8(uint64_t val) { + uint64_t* tmp = reinterpret_cast<uint64_t*>(cur_fpr_reg_); + *tmp = val; + cur_fpr_reg_ += 2; } - void PushLongArgument(jlong val) { - // This is an ugly hack for the following problem: - // Assume odd number of 32b registers. Then having exactly kRegsNeeded left needs to spill! - if (gpr_index_ >= kRegistersNeededForLong + (kNumNativeGprArgs % kRegistersNeededForLong)) { - if (kRegistersNeededForLong > 1 && ((kNumNativeGprArgs - gpr_index_) & 1) == 1) { - // Pad. - gpr_index_--; - cur_gpr_reg_++; - } - uint64_t* tmp = reinterpret_cast<uint64_t*>(cur_gpr_reg_); - *tmp = val; - cur_gpr_reg_ += kRegistersNeededForLong; - gpr_index_ -= kRegistersNeededForLong; - } else { - uint64_t* tmp = reinterpret_cast<uint64_t*>(cur_stack_arg_); - *tmp = val; - cur_stack_arg_ += kRegistersNeededForLong; - - gpr_index_ = 0; // can't use GPRs anymore - } + void PushStack(uintptr_t val) { + *cur_stack_arg_ = val; + cur_stack_arg_++; } - void PushFloatArgument(int32_t val) { - if (kNativeSoftFloatAbi) { - PushIntArgument(val); - } else { - if (fpr_index_ > 0) { - *cur_fpr_reg_ = val; - cur_fpr_reg_++; - if (kRegistersNeededForDouble == 1) { - // will pop 64 bits from the stack - // TODO: extend/clear bits??? - cur_fpr_reg_++; - } - fpr_index_--; - } else { - // TODO: Check ABI for floats. - *cur_stack_arg_ = val; - cur_stack_arg_++; - } - } + uintptr_t PushSirt(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + *cur_sirt_entry_ = StackReference<mirror::Object>::FromMirrorPtr(ref); + uintptr_t tmp = reinterpret_cast<uintptr_t>(cur_sirt_entry_); + cur_sirt_entry_--; + sirt_number_of_references_++; + return tmp; } - void PushDoubleArgument(uint64_t val) { - // See PushLongArgument for explanation - if (fpr_index_ >= kRegistersNeededForDouble + (kNumNativeFprArgs % kRegistersNeededForDouble)) { - if (kRegistersNeededForDouble > 1 && ((kNumNativeFprArgs - fpr_index_) & 1) == 1) { - // Pad. - fpr_index_--; - cur_fpr_reg_++; - } - uint64_t* tmp = reinterpret_cast<uint64_t*>(cur_fpr_reg_); - *tmp = val; - // TODO: the whole thing doesn't make sense if we take uint32_t*... - cur_fpr_reg_ += 2; // kRegistersNeededForDouble; - fpr_index_ -= kRegistersNeededForDouble; - } else { - if (!IsAligned<8>(cur_stack_arg_)) { - cur_stack_arg_++; // Pad. - } - uint64_t* tmp = reinterpret_cast<uint64_t*>(cur_stack_arg_); - *tmp = val; - cur_stack_arg_ += kRegistersNeededForDouble; + // Size of the part of the alloca that we actually need. + size_t GetAllocaUsedSize() { + return alloca_used_size_; + } - fpr_index_ = 0; // can't use FPRs anymore - } + void* GetCodeReturn() { + return code_return_; } + private: uint32_t sirt_number_of_references_; StackReference<mirror::Object>* cur_sirt_entry_; - uint32_t gpr_index_; // should be uint, but gives error because on some archs no regs + StackIndirectReferenceTable* sirt_; + uint32_t sirt_expected_refs_; uintptr_t* cur_gpr_reg_; - uint32_t fpr_index_; // ----- # ----- uint32_t* cur_fpr_reg_; uintptr_t* cur_stack_arg_; - uint8_t* top_of_sirt_; + StackReference<mirror::Object>* top_of_sirt_; + void* code_return_; + size_t alloca_used_size_; + + BuildGenericJniFrameStateMachine<BuildGenericJniFrameVisitor> sm_; DISALLOW_COPY_AND_ASSIGN(BuildGenericJniFrameVisitor); }; -extern "C" const void* artQuickGenericJniTrampoline(Thread* self, mirror::ArtMethod** sp) +/* + * Initializes an alloca region assumed to be directly below sp for a native call: + * Create a Sirt and call stack and fill a mini stack with values to be pushed to registers. + * The final element on the stack is a pointer to the native code. + * + * The return of this function denotes: + * 1) How many bytes of the alloca can be released, if the value is non-negative. + * 2) An error, if the value is negative. + */ +extern "C" ssize_t artQuickGenericJniTrampoline(Thread* self, mirror::ArtMethod** sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { uint32_t* sp32 = reinterpret_cast<uint32_t*>(sp); mirror::ArtMethod* called = *sp; @@ -1098,6 +1444,7 @@ extern "C" const void* artQuickGenericJniTrampoline(Thread* self, mirror::ArtMet // run the visitor MethodHelper mh(called); + BuildGenericJniFrameVisitor visitor(sp, called->IsStatic(), mh.GetShorty(), mh.GetShortyLength(), self); visitor.VisitArguments(); @@ -1110,10 +1457,10 @@ extern "C" const void* artQuickGenericJniTrampoline(Thread* self, mirror::ArtMet uint32_t cookie; if (called->IsSynchronized()) { cookie = JniMethodStartSynchronized(visitor.GetFirstSirtEntry(), self); - // TODO: error checking. if (self->IsExceptionPending()) { self->PopSirt(); - return nullptr; + // A negative value denotes an error. + return -1; } } else { cookie = JniMethodStart(self); @@ -1127,7 +1474,12 @@ extern "C" const void* artQuickGenericJniTrampoline(Thread* self, mirror::ArtMet LOG(FATAL) << "Finding native code not implemented yet."; } - return nativeCode; + uintptr_t* code_pointer = reinterpret_cast<uintptr_t*>(visitor.GetCodeReturn()); + size_t window_size = visitor.GetAllocaUsedSize(); + *code_pointer = reinterpret_cast<uintptr_t>(nativeCode); + + // 5K reserved, window_size used. + return 5*1024 - window_size; } /* @@ -1141,27 +1493,30 @@ extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self, mirror::ArtMet mirror::ArtMethod* called = *sp; uint32_t cookie = *(sp32-1); - // TODO: synchronized. MethodHelper mh(called); char return_shorty_char = mh.GetShorty()[0]; if (return_shorty_char == 'L') { // the only special ending call if (called->IsSynchronized()) { - BuildGenericJniFrameVisitor visitor(sp, called->IsStatic(), mh.GetShorty(), - mh.GetShortyLength(), self); - return reinterpret_cast<uint64_t>(JniMethodEndWithReferenceSynchronized(result.l, cookie, - visitor.GetFirstSirtEntry(), + ComputeGenericJniFrameSize fsc; + fsc.ComputeSirtOffset(); + uint32_t offset = fsc.GetFirstSirtEntryOffset(); + jobject tmp = reinterpret_cast<jobject>(reinterpret_cast<uint8_t*>(sp)-offset); + + return reinterpret_cast<uint64_t>(JniMethodEndWithReferenceSynchronized(result.l, cookie, tmp, self)); } else { return reinterpret_cast<uint64_t>(JniMethodEndWithReference(result.l, cookie, self)); } } else { if (called->IsSynchronized()) { - // run the visitor - BuildGenericJniFrameVisitor visitor(sp, called->IsStatic(), mh.GetShorty(), - mh.GetShortyLength(), self); - JniMethodEndSynchronized(cookie, visitor.GetFirstSirtEntry(), self); + ComputeGenericJniFrameSize fsc; + fsc.ComputeSirtOffset(); + uint32_t offset = fsc.GetFirstSirtEntryOffset(); + jobject tmp = reinterpret_cast<jobject>(reinterpret_cast<uint8_t*>(sp)-offset); + + JniMethodEndSynchronized(cookie, tmp, self); } else { JniMethodEnd(cookie, self); } diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h index bb4d1d7326..8b7bfd35ef 100644 --- a/runtime/gc/accounting/card_table.h +++ b/runtime/gc/accounting/card_table.h @@ -17,8 +17,8 @@ #ifndef ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_H_ #define ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_H_ +#include "base/mutex.h" #include "globals.h" -#include "locks.h" #include "mem_map.h" #include "UniquePtr.h" diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h index dde1425abf..7cfeb63e25 100644 --- a/runtime/gc/accounting/heap_bitmap.h +++ b/runtime/gc/accounting/heap_bitmap.h @@ -19,7 +19,6 @@ #include "base/logging.h" #include "gc_allocator.h" -#include "locks.h" #include "object_callbacks.h" #include "space_bitmap.h" diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc index 06127c11b9..887192183d 100644 --- a/runtime/gc/accounting/mod_union_table.cc +++ b/runtime/gc/accounting/mod_union_table.cc @@ -175,7 +175,6 @@ class CheckReferenceVisitor { } // Extra parameters are required since we use this same visitor signature for checking objects. - // TODO: Fixme when anotatalysis works with visitors. void operator()(Object* obj, Object* ref, const MemberOffset& /* offset */, bool /* is_static */) const SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) { diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h index 3c4b674fcd..5fd2bce8af 100644 --- a/runtime/gc/accounting/space_bitmap.h +++ b/runtime/gc/accounting/space_bitmap.h @@ -17,9 +17,9 @@ #ifndef ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_H_ #define ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_H_ +#include "base/mutex.h" #include "gc_allocator.h" #include "globals.h" -#include "locks.h" #include "mem_map.h" #include "object_callbacks.h" #include "UniquePtr.h" @@ -248,8 +248,7 @@ class ObjectSet { contained_ = space_set.contained_; } - void Walk(ObjectCallback* callback, void* arg) - SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_); + void Walk(ObjectCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); template <typename Visitor> void Visit(const Visitor& visitor) NO_THREAD_SAFETY_ANALYSIS { diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h index 088f1d4581..8d401b8812 100644 --- a/runtime/gc/collector/garbage_collector.h +++ b/runtime/gc/collector/garbage_collector.h @@ -18,10 +18,10 @@ #define ART_RUNTIME_GC_COLLECTOR_GARBAGE_COLLECTOR_H_ #include "base/histogram.h" +#include "base/mutex.h" #include "base/timing_logger.h" #include "gc/gc_cause.h" #include "gc_type.h" -#include "locks.h" #include <stdint.h> #include <vector> diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index 4aff68a569..71424bd886 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -1347,9 +1347,6 @@ void MarkSweep::FinishPhase() { timings_.NewSplit("PostGcVerification"); heap->PostGcVerification(this); - timings_.NewSplit("RequestHeapTrim"); - heap->RequestHeapTrim(); - // Update the cumulative statistics total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects(); total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes(); diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h index 5c0a233375..8d40c34f28 100644 --- a/runtime/gc/collector/mark_sweep.h +++ b/runtime/gc/collector/mark_sweep.h @@ -114,7 +114,7 @@ class MarkSweep : public GarbageCollector { EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - bool IsImmuneSpace(const space::ContinuousSpace* space) const; + bool IsImmuneSpace(const space::ContinuousSpace* space) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Bind the live bits to the mark bits of bitmaps for spaces that are never collected, ie @@ -152,6 +152,7 @@ class MarkSweep : public GarbageCollector { // Sweep only pointers within an array. WARNING: Trashes objects. void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); // Blackens an object. diff --git a/runtime/gc/collector/partial_mark_sweep.h b/runtime/gc/collector/partial_mark_sweep.h index 44ae9e9296..ac0d068194 100644 --- a/runtime/gc/collector/partial_mark_sweep.h +++ b/runtime/gc/collector/partial_mark_sweep.h @@ -17,7 +17,6 @@ #ifndef ART_RUNTIME_GC_COLLECTOR_PARTIAL_MARK_SWEEP_H_ #define ART_RUNTIME_GC_COLLECTOR_PARTIAL_MARK_SWEEP_H_ -#include "locks.h" #include "mark_sweep.h" namespace art { diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc index a577f909de..2da360f3a0 100644 --- a/runtime/gc/collector/semi_space.cc +++ b/runtime/gc/collector/semi_space.cc @@ -678,13 +678,14 @@ void SemiSpace::DelayReferenceReferent(mirror::Class* klass, Object* obj) { heap_->DelayReferenceReferent(klass, obj, MarkedForwardingAddressCallback, this); } -// Visit all of the references of an object and update. -void SemiSpace::ScanObject(Object* obj) { - DCHECK(obj != NULL); - DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space"; - MarkSweep::VisitObjectReferences(obj, [this](Object* obj, Object* ref, const MemberOffset& offset, - bool /* is_static */) ALWAYS_INLINE_LAMBDA NO_THREAD_SAFETY_ANALYSIS { - mirror::Object* new_address = MarkObject(ref); +class SemiSpaceMarkObjectVisitor { + public: + explicit SemiSpaceMarkObjectVisitor(SemiSpace* semi_space) : semi_space_(semi_space) { + } + + void operator()(Object* obj, Object* ref, const MemberOffset& offset, bool /* is_static */) + const ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS /* EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) */ { + mirror::Object* new_address = semi_space_->MarkObject(ref); if (new_address != ref) { DCHECK(new_address != nullptr); // Don't need to mark the card since we updating the object address and not changing the @@ -694,7 +695,17 @@ void SemiSpace::ScanObject(Object* obj) { // disable check as we could run inside a transaction. obj->SetFieldObjectWithoutWriteBarrier<false, false, kVerifyNone>(offset, new_address, false); } - }, kMovingClasses); + } + private: + SemiSpace* const semi_space_; +}; + +// Visit all of the references of an object and update. +void SemiSpace::ScanObject(Object* obj) { + DCHECK(obj != NULL); + DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space"; + SemiSpaceMarkObjectVisitor visitor(this); + MarkSweep::VisitObjectReferences(obj, visitor, kMovingClasses); mirror::Class* klass = obj->GetClass<kVerifyNone>(); if (UNLIKELY(klass->IsReferenceClass<kVerifyNone>())) { DelayReferenceReferent(klass, obj); diff --git a/runtime/gc/collector/sticky_mark_sweep.h b/runtime/gc/collector/sticky_mark_sweep.h index 98f2b59243..934b1bd368 100644 --- a/runtime/gc/collector/sticky_mark_sweep.h +++ b/runtime/gc/collector/sticky_mark_sweep.h @@ -18,7 +18,6 @@ #define ART_RUNTIME_GC_COLLECTOR_STICKY_MARK_SWEEP_H_ #include "base/macros.h" -#include "locks.h" #include "partial_mark_sweep.h" namespace art { @@ -43,7 +42,9 @@ class StickyMarkSweep FINAL : public PartialMarkSweep { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); - void Sweep(bool swap_bitmaps) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); + void Sweep(bool swap_bitmaps) OVERRIDE + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); // Don't need to do anything special here since we scan all the cards which may have references // to the newly allocated objects. diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index 89ded0b27f..b80e72e6cf 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -24,8 +24,8 @@ #include "gc/space/dlmalloc_space-inl.h" #include "gc/space/large_object_space.h" #include "gc/space/rosalloc_space-inl.h" -#include "object_utils.h" #include "runtime.h" +#include "sirt_ref-inl.h" #include "thread.h" #include "thread-inl.h" #include "verify_object-inl.h" @@ -37,7 +37,9 @@ template <bool kInstrumented, bool kCheckLargeObject, typename PreFenceVisitor> inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Class* klass, size_t byte_count, AllocatorType allocator, const PreFenceVisitor& pre_fence_visitor) { - DebugCheckPreconditionsForAllocObject(klass, byte_count); + if (kIsDebugBuild) { + CheckPreconditionsForAllocObject(klass, byte_count); + } // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are // done in the runnable state where suspension is expected. DCHECK_EQ(self->GetState(), kRunnable); @@ -107,7 +109,7 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas // optimized out. And for the other allocators, AllocatorMayHaveConcurrentGC is a constant since // the allocator_type should be constant propagated. if (AllocatorMayHaveConcurrentGC(allocator) && concurrent_gc_) { - CheckConcurrentGC(self, new_num_bytes_allocated, obj); + CheckConcurrentGC(self, new_num_bytes_allocated, &obj); } VerifyObject(obj); self->VerifyStack(); @@ -226,13 +228,6 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator return ret; } -inline void Heap::DebugCheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count) { - DCHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) || - (c->IsVariableSize() || c->GetObjectSize() == byte_count) || - strlen(ClassHelper(c).GetDescriptor()) == 0); - DCHECK_GE(byte_count, sizeof(mirror::Object)); -} - inline Heap::AllocationTimer::AllocationTimer(Heap* heap, mirror::Object** allocated_obj_ptr) : heap_(heap), allocated_obj_ptr_(allocated_obj_ptr) { if (kMeasureAllocationTime) { @@ -280,11 +275,13 @@ inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t } inline void Heap::CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated, - mirror::Object* obj) { + mirror::Object** obj) { if (UNLIKELY(new_num_bytes_allocated >= concurrent_start_bytes_)) { // The SirtRef is necessary since the calls in RequestConcurrentGC are a safepoint. - SirtRef<mirror::Object> ref(self, obj); + SirtRef<mirror::Object> ref(self, *obj); RequestConcurrentGC(self); + // Restore obj in case it moved. + *obj = ref.get(); } } diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 87ee21bb86..87b4e60c82 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -90,6 +90,11 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max collector_type_(kCollectorTypeNone), post_zygote_collector_type_(post_zygote_collector_type), background_collector_type_(background_collector_type), + desired_collector_type_(collector_type_), + heap_trim_request_lock_(nullptr), + heap_trim_target_time_(0), + heap_transition_target_time_(0), + heap_trim_request_pending_(false), parallel_gc_threads_(parallel_gc_threads), conc_gc_threads_(conc_gc_threads), low_memory_mode_(low_memory_mode), @@ -127,7 +132,6 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max verify_mod_union_table_(false), verify_pre_gc_rosalloc_(verify_pre_gc_rosalloc), verify_post_gc_rosalloc_(verify_post_gc_rosalloc), - last_trim_time_ms_(0), allocation_rate_(0), /* For GC a lot mode, we limit the allocations stacks to be kGcAlotInterval allocations. This * causes a lot of GC since we do a GC for alloc whenever the stack is full. When heap @@ -160,16 +164,17 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max // If we aren't the zygote, switch to the default non zygote allocator. This may update the // entrypoints. if (!Runtime::Current()->IsZygote()) { - ChangeCollector(post_zygote_collector_type_); + desired_collector_type_ = post_zygote_collector_type_; large_object_threshold_ = kDefaultLargeObjectThreshold; } else { if (kMovingCollector) { // We are the zygote, use bump pointer allocation + semi space collector. - ChangeCollector(kCollectorTypeSS); + desired_collector_type_ = kCollectorTypeSS; } else { - ChangeCollector(post_zygote_collector_type_); + desired_collector_type_ = post_zygote_collector_type_; } } + ChangeCollector(desired_collector_type_); live_bitmap_.reset(new accounting::HeapBitmap(this)); mark_bitmap_.reset(new accounting::HeapBitmap(this)); @@ -274,7 +279,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max gc_complete_lock_ = new Mutex("GC complete lock"); gc_complete_cond_.reset(new ConditionVariable("GC complete condition variable", *gc_complete_lock_)); - last_gc_time_ns_ = NanoTime(); + heap_trim_request_lock_ = new Mutex("Heap trim request lock"); last_gc_size_ = GetBytesAllocated(); if (ignore_max_footprint_) { @@ -318,6 +323,16 @@ void Heap::ChangeAllocator(AllocatorType allocator) { } } +void Heap::DisableCompaction() { + if (IsCompactingGC(post_zygote_collector_type_)) { + post_zygote_collector_type_ = kCollectorTypeCMS; + } + if (IsCompactingGC(background_collector_type_)) { + background_collector_type_ = post_zygote_collector_type_; + } + TransitionCollector(post_zygote_collector_type_); +} + std::string Heap::SafeGetClassDescriptor(mirror::Class* klass) { if (!IsValidContinuousSpaceObjectAddress(klass)) { return StringPrintf("<non heap address klass %p>", klass); @@ -442,12 +457,12 @@ void Heap::UpdateProcessState(ProcessState process_state) { if (process_state_ != process_state) { process_state_ = process_state; if (process_state_ == kProcessStateJankPerceptible) { - TransitionCollector(post_zygote_collector_type_); + // Transition back to foreground right away to prevent jank. + RequestHeapTransition(post_zygote_collector_type_, 0); } else { - TransitionCollector(background_collector_type_); + // Don't delay for debug builds since we may want to stress test the GC. + RequestHeapTransition(background_collector_type_, kIsDebugBuild ? 0 : kHeapTransitionWait); } - } else { - CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false); } } @@ -844,9 +859,40 @@ void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_obj self->ThrowOutOfMemoryError(oss.str().c_str()); } +void Heap::DoPendingTransitionOrTrim() { + Thread* self = Thread::Current(); + CollectorType desired_collector_type; + // Wait until we reach the desired transition time. + while (true) { + uint64_t wait_time; + { + MutexLock mu(self, *heap_trim_request_lock_); + desired_collector_type = desired_collector_type_; + uint64_t current_time = NanoTime(); + if (current_time >= heap_transition_target_time_) { + break; + } + wait_time = heap_transition_target_time_ - current_time; + } + ScopedThreadStateChange tsc(self, kSleeping); + usleep(wait_time / 1000); // Usleep takes microseconds. + } + // Transition the heap if the desired collector type is nto the same as the current collector type. + TransitionCollector(desired_collector_type); + // Do a heap trim if it is needed. + Trim(); +} + void Heap::Trim() { Thread* self = Thread::Current(); { + MutexLock mu(self, *heap_trim_request_lock_); + if (!heap_trim_request_pending_ || NanoTime() < heap_trim_target_time_) { + return; + } + heap_trim_request_pending_ = false; + } + { // Need to do this before acquiring the locks since we don't want to get suspended while // holding any locks. ScopedThreadStateChange tsc(self, kWaitingForGcToComplete); @@ -1731,6 +1777,7 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus collector->Run(gc_cause, clear_soft_references); total_objects_freed_ever_ += collector->GetFreedObjects(); total_bytes_freed_ever_ += collector->GetFreedBytes(); + RequestHeapTrim(Heap::kHeapTrimWait); // Enqueue cleared references. EnqueueClearedReferences(); // Grow the heap so that we know when to perform the next GC. @@ -2493,7 +2540,20 @@ void Heap::ConcurrentGC(Thread* self) { } } -void Heap::RequestHeapTrim() { +void Heap::RequestHeapTransition(CollectorType desired_collector_type, uint64_t delta_time) { + Thread* self = Thread::Current(); + { + MutexLock mu(self, *heap_trim_request_lock_); + if (desired_collector_type_ == desired_collector_type) { + return; + } + heap_transition_target_time_ = std::max(heap_transition_target_time_, NanoTime() + delta_time); + desired_collector_type_ = desired_collector_type; + } + SignalHeapTrimDaemon(self); +} + +void Heap::RequestHeapTrim(uint64_t delta_time) { // GC completed and now we must decide whether to request a heap trim (advising pages back to the // kernel) or not. Issuing a request will also cause trimming of the libc heap. As a trim scans // a space it will hold its lock and can become a cause of jank. @@ -2506,11 +2566,6 @@ void Heap::RequestHeapTrim() { // to utilization (which is probably inversely proportional to how much benefit we can expect). // We could try mincore(2) but that's only a measure of how many pages we haven't given away, // not how much use we're making of those pages. - uint64_t ms_time = MilliTime(); - // Don't bother trimming the alloc space if a heap trim occurred in the last two seconds. - if (ms_time - last_trim_time_ms_ < 2 * 1000) { - return; - } Thread* self = Thread::Current(); Runtime* runtime = Runtime::Current(); @@ -2521,19 +2576,27 @@ void Heap::RequestHeapTrim() { return; } - last_trim_time_ms_ = ms_time; - - // Trim only if we do not currently care about pause times. + // Request a heap trim only if we do not currently care about pause times. if (!CareAboutPauseTimes()) { - JNIEnv* env = self->GetJniEnv(); - DCHECK(WellKnownClasses::java_lang_Daemons != NULL); - DCHECK(WellKnownClasses::java_lang_Daemons_requestHeapTrim != NULL); - env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons, - WellKnownClasses::java_lang_Daemons_requestHeapTrim); - CHECK(!env->ExceptionCheck()); + { + MutexLock mu(self, *heap_trim_request_lock_); + heap_trim_target_time_ = std::max(heap_trim_target_time_, NanoTime() + delta_time); + heap_trim_request_pending_ = true; + } + // Notify the daemon thread which will actually do the heap trim. + SignalHeapTrimDaemon(self); } } +void Heap::SignalHeapTrimDaemon(Thread* self) { + JNIEnv* env = self->GetJniEnv(); + DCHECK(WellKnownClasses::java_lang_Daemons != nullptr); + DCHECK(WellKnownClasses::java_lang_Daemons_requestHeapTrim != nullptr); + env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons, + WellKnownClasses::java_lang_Daemons_requestHeapTrim); + CHECK(!env->ExceptionCheck()); +} + void Heap::RevokeThreadLocalBuffers(Thread* thread) { if (rosalloc_space_ != nullptr) { rosalloc_space_->RevokeThreadLocalBuffers(thread); @@ -2645,5 +2708,12 @@ void Heap::AddModUnionTable(accounting::ModUnionTable* mod_union_table) { mod_union_tables_.Put(mod_union_table->GetSpace(), mod_union_table); } +void Heap::CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count) { + CHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) || + (c->IsVariableSize() || c->GetObjectSize() == byte_count) || + strlen(ClassHelper(c).GetDescriptor()) == 0); + CHECK_GE(byte_count, sizeof(mirror::Object)); +} + } // namespace gc } // namespace art diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 88adf811c5..3a8739a020 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -31,7 +31,6 @@ #include "globals.h" #include "gtest/gtest.h" #include "jni.h" -#include "locks.h" #include "object_callbacks.h" #include "offsets.h" #include "reference_queue.h" @@ -135,6 +134,10 @@ class Heap { // Used so that we don't overflow the allocation time atomic integer. static constexpr size_t kTimeAdjust = 1024; + // How long we wait after a GC to perform a heap trim (nanoseconds). + static constexpr uint64_t kHeapTrimWait = MsToNs(5000); + static constexpr uint64_t kHeapTransitionWait = MsToNs(5000); + // Create a heap with the requested sizes. The possible empty // image_file_names names specify Spaces to load based on // ImageWriter output. @@ -189,7 +192,7 @@ class Heap { void SwapSemiSpaces() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); - void DebugCheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count) + void CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void ThrowOutOfMemoryError(size_t byte_count, bool large_object_allocation); @@ -437,8 +440,12 @@ class Heap { void DumpForSigQuit(std::ostream& os); + + // Do a pending heap transition or trim. + void DoPendingTransitionOrTrim() LOCKS_EXCLUDED(heap_trim_request_lock_); + // Trim the managed and native heaps by releasing unused memory back to the OS. - void Trim(); + void Trim() LOCKS_EXCLUDED(heap_trim_request_lock_); void RevokeThreadLocalBuffers(Thread* thread); void RevokeAllThreadLocalBuffers(); @@ -487,6 +494,9 @@ class Heap { // Assumes there is only one image space. space::ImageSpace* GetImageSpace() const; + // Permenantly disable compaction. + void DisableCompaction(); + space::DlMallocSpace* GetDlMallocSpace() const { return dlmalloc_space_; } @@ -572,7 +582,8 @@ class Heap { bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); ALWAYS_INLINE void CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated, - mirror::Object* obj); + mirror::Object** obj) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // We don't force this to be inlined since it is a slow path. template <bool kInstrumented, typename PreFenceVisitor> @@ -636,7 +647,9 @@ class Heap { collector::GcType WaitForGcToCompleteLocked(Thread* self) EXCLUSIVE_LOCKS_REQUIRED(gc_complete_lock_); - void RequestHeapTrim() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_); + void RequestHeapTransition(CollectorType desired_collector_type, uint64_t delta_time) + LOCKS_EXCLUDED(heap_trim_request_lock_); + void RequestHeapTrim(uint64_t delta_time) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_); void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_); bool IsGCRequestPending() const; @@ -670,7 +683,7 @@ class Heap { void RemoveSpace(space::Space* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_); static void VerificationCallback(mirror::Object* obj, void* arg) - SHARED_LOCKS_REQUIRED(GlobalSychronization::heap_bitmap_lock_); + SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); // Swap the allocation stack with the live stack. void SwapStacks(Thread* self); @@ -678,6 +691,10 @@ class Heap { // Clear cards and update the mod union table. void ProcessCards(TimingLogger& timings); + // Signal the heap trim daemon that there is something to do, either a heap transition or heap + // trim. + void SignalHeapTrimDaemon(Thread* self); + // Push an object onto the allocation stack. void PushOnAllocationStack(Thread* self, mirror::Object* obj); @@ -730,6 +747,17 @@ class Heap { CollectorType post_zygote_collector_type_; // Which collector we will use when the app is notified of a transition to background. CollectorType background_collector_type_; + // Desired collector type, heap trimming daemon transitions the heap if it is != collector_type_. + CollectorType desired_collector_type_; + + // Lock which guards heap trim requests. + Mutex* heap_trim_request_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + // When we want to perform the next heap trim (nano seconds). + uint64_t heap_trim_target_time_ GUARDED_BY(heap_trim_request_lock_); + // When we want to perform the next heap transition (nano seconds). + uint64_t heap_transition_target_time_ GUARDED_BY(heap_trim_request_lock_); + // If we have a heap trim request pending. + bool heap_trim_request_pending_ GUARDED_BY(heap_trim_request_lock_); // How many GC threads we may use for paused parts of garbage collection. const size_t parallel_gc_threads_; @@ -851,9 +879,6 @@ class Heap { // Parallel GC data structures. UniquePtr<ThreadPool> thread_pool_; - // The last time a heap trim occurred. - uint64_t last_trim_time_ms_; - // The nanosecond time at which the last GC ended. uint64_t last_gc_time_ns_; diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h index e12a95f332..99314ba0ef 100644 --- a/runtime/gc/reference_queue.h +++ b/runtime/gc/reference_queue.h @@ -26,7 +26,6 @@ #include "globals.h" #include "gtest/gtest.h" #include "jni.h" -#include "locks.h" #include "object_callbacks.h" #include "offsets.h" #include "thread_pool.h" diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h index 2c9d35fa55..031fccdfcd 100644 --- a/runtime/gc/space/bump_pointer_space.h +++ b/runtime/gc/space/bump_pointer_space.h @@ -146,9 +146,6 @@ class BumpPointerSpace FINAL : public ContinuousMemMapAllocSpace { byte* AllocBlock(size_t bytes) EXCLUSIVE_LOCKS_REQUIRED(block_lock_); void RevokeThreadLocalBuffersLocked(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(block_lock_); - mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated) - EXCLUSIVE_LOCKS_REQUIRED(lock_); - // The main block is an unbounded block where objects go when there are no other blocks. This // enables us to maintain tightly packed objects when you are not using thread local buffers for // allocation. The main block starts at the space Begin(). diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc index 61d1071124..dac043efbb 100644 --- a/runtime/gc/space/malloc_space.cc +++ b/runtime/gc/space/malloc_space.cc @@ -24,6 +24,7 @@ #include "mirror/class-inl.h" #include "mirror/object-inl.h" #include "runtime.h" +#include "sirt_ref-inl.h" #include "thread.h" #include "thread_list.h" #include "utils.h" diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h index 413fc1dcf9..ea0d290b1d 100644 --- a/runtime/gc/space/space_test.h +++ b/runtime/gc/space/space_test.h @@ -75,7 +75,7 @@ class SpaceTest : public CommonRuntimeTest { void SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size, CreateSpaceFn create_space); }; -static size_t test_rand(size_t* seed) { +static inline size_t test_rand(size_t* seed) { *seed = *seed * 1103515245 + 12345; return *seed; } diff --git a/runtime/globals.h b/runtime/globals.h index 83e302892a..5bc4b9146d 100644 --- a/runtime/globals.h +++ b/runtime/globals.h @@ -99,6 +99,9 @@ static constexpr bool kUseBrooksPointer = true; static constexpr bool kUseBrooksPointer = false; #endif +// If true, references within the heap are poisoned (negated). +static constexpr bool kPoisonHeapReferences = false; + } // namespace art #endif // ART_RUNTIME_GLOBALS_H_ diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc index 54c7b6e45d..ed3fb5fa0c 100644 --- a/runtime/indirect_reference_table.cc +++ b/runtime/indirect_reference_table.cc @@ -21,11 +21,38 @@ #include "scoped_thread_state_change.h" #include "thread.h" #include "utils.h" +#include "verify_object-inl.h" #include <cstdlib> namespace art { +template<typename T> +class MutatorLockedDumpable { + public: + explicit MutatorLockedDumpable(T& value) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) : value_(value) { + } + + void Dump(std::ostream& os) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + value_.Dump(os); + } + + private: + T& value_; + + DISALLOW_COPY_AND_ASSIGN(MutatorLockedDumpable); +}; + +template<typename T> +std::ostream& operator<<(std::ostream& os, const MutatorLockedDumpable<T>& rhs) +// TODO: should be SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) however annotalysis +// currently fails for this. + NO_THREAD_SAFETY_ANALYSIS { + rhs.Dump(os); + return os; +} + static void AbortMaybe() { // If -Xcheck:jni is on, it'll give a more detailed error before aborting. if (!Runtime::Current()->GetJavaVM()->check_jni) { @@ -81,8 +108,7 @@ IndirectRef IndirectReferenceTable::Add(uint32_t cookie, mirror::Object* obj) { size_t topIndex = segment_state_.parts.topIndex; CHECK(obj != NULL); - // TODO: stronger sanity check on the object (such as in heap) - DCHECK_ALIGNED(reinterpret_cast<uintptr_t>(obj), 8); + VerifyObject(obj); DCHECK(table_ != NULL); DCHECK_LE(alloc_entries_, max_entries_); DCHECK_GE(segment_state_.parts.numHoles, prevState.parts.numHoles); @@ -329,4 +355,13 @@ void IndirectReferenceTable::Dump(std::ostream& os) const { ReferenceTable::Dump(os, entries); } +mirror::Object* IndirectReferenceTable::Get(IndirectRef iref) const { + if (!GetChecked(iref)) { + return kInvalidIndirectRefObject; + } + mirror::Object* obj = table_[ExtractIndex(iref)];; + VerifyObject(obj); + return obj; +} + } // namespace art diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h index 9d2fa35103..a2de726de4 100644 --- a/runtime/indirect_reference_table.h +++ b/runtime/indirect_reference_table.h @@ -23,6 +23,7 @@ #include <string> #include "base/logging.h" +#include "base/mutex.h" #include "object_callbacks.h" #include "offsets.h" @@ -266,12 +267,7 @@ class IndirectReferenceTable { * * Returns kInvalidIndirectRefObject if iref is invalid. */ - mirror::Object* Get(IndirectRef iref) const { - if (!GetChecked(iref)) { - return kInvalidIndirectRefObject; - } - return table_[ExtractIndex(iref)]; - } + mirror::Object* Get(IndirectRef iref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // TODO: remove when we remove work_around_app_jni_bugs support. bool ContainsDirectPointer(mirror::Object* direct_pointer) const; diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h index e04d7b22e7..e9356e06da 100644 --- a/runtime/instrumentation.h +++ b/runtime/instrumentation.h @@ -19,7 +19,7 @@ #include "atomic.h" #include "base/macros.h" -#include "locks.h" +#include "base/mutex.h" #include <stdint.h> #include <set> diff --git a/runtime/intern_table.h b/runtime/intern_table.h index fd921f3daf..7dd06c6f7a 100644 --- a/runtime/intern_table.h +++ b/runtime/intern_table.h @@ -17,12 +17,11 @@ #ifndef ART_RUNTIME_INTERN_TABLE_H_ #define ART_RUNTIME_INTERN_TABLE_H_ +#include <map> + #include "base/mutex.h" -#include "locks.h" #include "object_callbacks.h" -#include <map> - namespace art { enum VisitRootFlags : uint8_t; diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h index efe11fc140..0750eb5c49 100644 --- a/runtime/interpreter/interpreter.h +++ b/runtime/interpreter/interpreter.h @@ -17,8 +17,8 @@ #ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_H_ #define ART_RUNTIME_INTERPRETER_INTERPRETER_H_ +#include "base/mutex.h" #include "dex_file.h" -#include "locks.h" namespace art { namespace mirror { diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h index fdbdfeb3b1..fec0e31806 100644 --- a/runtime/jdwp/jdwp.h +++ b/runtime/jdwp/jdwp.h @@ -31,11 +31,13 @@ struct iovec; namespace art { - union JValue; + +union JValue; +class Thread; + namespace mirror { class ArtMethod; } // namespace mirror -class Thread; namespace JDWP { @@ -156,7 +158,7 @@ struct JdwpState { // ObjectId GetWaitForEventThread(); void SetWaitForEventThread(ObjectId threadId) LOCKS_EXCLUDED(event_thread_lock_, process_request_lock_); - void ClearWaitForEventThread() LOCKS_EXCLUDED(event_thread_lock); + void ClearWaitForEventThread() LOCKS_EXCLUDED(event_thread_lock_); /* * These notify the debug code that something interesting has happened. This @@ -334,6 +336,7 @@ struct JdwpState { // Linked list of events requested by the debugger (breakpoints, class prep, etc). Mutex event_list_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + JdwpEvent* event_list_ GUARDED_BY(event_list_lock_); size_t event_list_size_ GUARDED_BY(event_list_lock_); // Number of elements in event_list_. size_t full_deoptimization_requests_ GUARDED_BY(event_list_lock_); // Number of events requiring diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc index 1bcb8dd3bc..f8865ea9ef 100644 --- a/runtime/jni_internal.cc +++ b/runtime/jni_internal.cc @@ -2466,8 +2466,7 @@ class JNI { return JNI_OK; } - static jint MonitorEnter(JNIEnv* env, jobject java_object) - EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) { + static jint MonitorEnter(JNIEnv* env, jobject java_object) NO_THREAD_SAFETY_ANALYSIS { CHECK_NON_NULL_ARGUMENT(MonitorEnter, java_object); ScopedObjectAccess soa(env); mirror::Object* o = soa.Decode<mirror::Object*>(java_object); @@ -2479,8 +2478,7 @@ class JNI { return JNI_OK; } - static jint MonitorExit(JNIEnv* env, jobject java_object) - UNLOCK_FUNCTION(monitor_lock_) { + static jint MonitorExit(JNIEnv* env, jobject java_object) NO_THREAD_SAFETY_ANALYSIS { CHECK_NON_NULL_ARGUMENT(MonitorExit, java_object); ScopedObjectAccess soa(env); mirror::Object* o = soa.Decode<mirror::Object*>(java_object); @@ -2539,11 +2537,13 @@ class JNI { IndirectRef ref = reinterpret_cast<IndirectRef>(java_object); IndirectRefKind kind = GetIndirectRefKind(ref); switch (kind) { - case kLocal: + case kLocal: { + ScopedObjectAccess soa(env); if (static_cast<JNIEnvExt*>(env)->locals.Get(ref) != kInvalidIndirectRefObject) { return JNILocalRefType; } return JNIInvalidRefType; + } case kGlobal: return JNIGlobalRefType; case kWeakGlobal: @@ -3194,7 +3194,11 @@ mirror::Object* JavaVMExt::DecodeWeakGlobal(Thread* self, IndirectRef ref) { while (UNLIKELY(!allow_new_weak_globals_)) { weak_globals_add_condition_.WaitHoldingLocks(self); } - return const_cast<mirror::Object*>(weak_globals_.Get(ref)); + mirror::Object* obj = weak_globals_.Get(ref); + if (obj != kClearedJniWeakGlobal) { + VerifyObject(obj); + } + return obj; } void JavaVMExt::DumpReferenceTables(std::ostream& os) { diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h index 606d5d1311..7b49d33625 100644 --- a/runtime/jni_internal.h +++ b/runtime/jni_internal.h @@ -25,7 +25,6 @@ #include "object_callbacks.h" #include "reference_table.h" #include "runtime.h" -#include "sirt_ref.h" #include <iosfwd> #include <string> @@ -48,6 +47,7 @@ union JValue; class Libraries; class ParsedOptions; class ScopedObjectAccess; +template<class T> class SirtRef; class Thread; void JniAbortF(const char* jni_function_name, const char* fmt, ...) @@ -101,7 +101,8 @@ class JavaVMExt : public JavaVM { void DeleteWeakGlobalRef(Thread* self, jweak obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void SweepJniWeakGlobals(IsMarkedCallback* callback, void* arg); - mirror::Object* DecodeWeakGlobal(Thread* self, IndirectRef ref); + mirror::Object* DecodeWeakGlobal(Thread* self, IndirectRef ref) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); Runtime* runtime; diff --git a/runtime/locks.cc b/runtime/locks.cc deleted file mode 100644 index 246e339ce9..0000000000 --- a/runtime/locks.cc +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "locks.h" - -#include "base/mutex.h" - -namespace art { - -Mutex* Locks::abort_lock_ = NULL; -Mutex* Locks::breakpoint_lock_ = NULL; -Mutex* Locks::deoptimization_lock_ = NULL; -ReaderWriterMutex* Locks::classlinker_classes_lock_ = NULL; -ReaderWriterMutex* Locks::heap_bitmap_lock_ = NULL; -Mutex* Locks::logging_lock_ = NULL; -ReaderWriterMutex* Locks::mutator_lock_ = NULL; -Mutex* Locks::runtime_shutdown_lock_ = NULL; -Mutex* Locks::thread_list_lock_ = NULL; -Mutex* Locks::thread_suspend_count_lock_ = NULL; -Mutex* Locks::trace_lock_ = NULL; -Mutex* Locks::profiler_lock_ = NULL; -Mutex* Locks::unexpected_signal_lock_ = NULL; -Mutex* Locks::intern_table_lock_ = NULL; - -void Locks::Init() { - if (logging_lock_ != NULL) { - // Already initialized. - DCHECK(abort_lock_ != NULL); - DCHECK(breakpoint_lock_ != NULL); - DCHECK(deoptimization_lock_ != NULL); - DCHECK(classlinker_classes_lock_ != NULL); - DCHECK(heap_bitmap_lock_ != NULL); - DCHECK(logging_lock_ != NULL); - DCHECK(mutator_lock_ != NULL); - DCHECK(thread_list_lock_ != NULL); - DCHECK(thread_suspend_count_lock_ != NULL); - DCHECK(trace_lock_ != NULL); - DCHECK(profiler_lock_ != NULL); - DCHECK(unexpected_signal_lock_ != NULL); - DCHECK(intern_table_lock_ != NULL); - } else { - logging_lock_ = new Mutex("logging lock", kLoggingLock, true); - abort_lock_ = new Mutex("abort lock", kAbortLock, true); - - DCHECK(breakpoint_lock_ == NULL); - breakpoint_lock_ = new Mutex("breakpoint lock", kBreakpointLock); - DCHECK(deoptimization_lock_ == NULL); - deoptimization_lock_ = new Mutex("deoptimization lock", kDeoptimizationLock); - DCHECK(classlinker_classes_lock_ == NULL); - classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock", - kClassLinkerClassesLock); - DCHECK(heap_bitmap_lock_ == NULL); - heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", kHeapBitmapLock); - DCHECK(mutator_lock_ == NULL); - mutator_lock_ = new ReaderWriterMutex("mutator lock", kMutatorLock); - DCHECK(runtime_shutdown_lock_ == NULL); - runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", kRuntimeShutdownLock); - DCHECK(thread_list_lock_ == NULL); - thread_list_lock_ = new Mutex("thread list lock", kThreadListLock); - DCHECK(thread_suspend_count_lock_ == NULL); - thread_suspend_count_lock_ = new Mutex("thread suspend count lock", kThreadSuspendCountLock); - DCHECK(trace_lock_ == NULL); - trace_lock_ = new Mutex("trace lock", kTraceLock); - DCHECK(profiler_lock_ == NULL); - profiler_lock_ = new Mutex("profiler lock", kProfilerLock); - DCHECK(unexpected_signal_lock_ == NULL); - unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true); - DCHECK(intern_table_lock_ == NULL); - intern_table_lock_ = new Mutex("InternTable lock", kInternTableLock); - } -} - -} // namespace art diff --git a/runtime/locks.h b/runtime/locks.h deleted file mode 100644 index 4343ab40ed..0000000000 --- a/runtime/locks.h +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_RUNTIME_LOCKS_H_ -#define ART_RUNTIME_LOCKS_H_ - -#include <ostream> - -#include "base/macros.h" - -namespace art { - -class LOCKABLE Mutex; -class LOCKABLE ReaderWriterMutex; - -// LockLevel is used to impose a lock hierarchy [1] where acquisition of a Mutex at a higher or -// equal level to a lock a thread holds is invalid. The lock hierarchy achieves a cycle free -// partial ordering and thereby cause deadlock situations to fail checks. -// -// [1] http://www.drdobbs.com/parallel/use-lock-hierarchies-to-avoid-deadlock/204801163 -enum LockLevel { - kLoggingLock = 0, - kUnexpectedSignalLock, - kThreadSuspendCountLock, - kAbortLock, - kJdwpSocketLock, - kRosAllocGlobalLock, - kRosAllocBracketLock, - kRosAllocBulkFreeLock, - kAllocSpaceLock, - kDexFileMethodInlinerLock, - kDexFileToMethodInlinerMapLock, - kMarkSweepMarkStackLock, - kTransactionLogLock, - kInternTableLock, - kMonitorPoolLock, - kDefaultMutexLevel, - kMarkSweepLargeObjectLock, - kPinTableLock, - kLoadLibraryLock, - kJdwpObjectRegistryLock, - kClassLinkerClassesLock, - kBreakpointLock, - kMonitorLock, - kThreadListLock, - kBreakpointInvokeLock, - kDeoptimizationLock, - kTraceLock, - kProfilerLock, - kJdwpEventListLock, - kJdwpAttachLock, - kJdwpStartLock, - kRuntimeShutdownLock, - kHeapBitmapLock, - kMutatorLock, - kZygoteCreationLock, - - kLockLevelCount // Must come last. -}; -std::ostream& operator<<(std::ostream& os, const LockLevel& rhs); - -// Global mutexes corresponding to the levels above. -class Locks { - public: - static void Init(); - - // The mutator_lock_ is used to allow mutators to execute in a shared (reader) mode or to block - // mutators by having an exclusive (writer) owner. In normal execution each mutator thread holds - // a share on the mutator_lock_. The garbage collector may also execute with shared access but - // at times requires exclusive access to the heap (not to be confused with the heap meta-data - // guarded by the heap_lock_ below). When the garbage collector requires exclusive access it asks - // the mutators to suspend themselves which also involves usage of the thread_suspend_count_lock_ - // to cover weaknesses in using ReaderWriterMutexes with ConditionVariables. We use a condition - // variable to wait upon in the suspension logic as releasing and then re-acquiring a share on - // the mutator lock doesn't necessarily allow the exclusive user (e.g the garbage collector) - // chance to acquire the lock. - // - // Thread suspension: - // Shared users | Exclusive user - // (holding mutator lock and in kRunnable state) | .. running .. - // .. running .. | Request thread suspension by: - // .. running .. | - acquiring thread_suspend_count_lock_ - // .. running .. | - incrementing Thread::suspend_count_ on - // .. running .. | all mutator threads - // .. running .. | - releasing thread_suspend_count_lock_ - // .. running .. | Block trying to acquire exclusive mutator lock - // Poll Thread::suspend_count_ and enter full | .. blocked .. - // suspend code. | .. blocked .. - // Change state to kSuspended | .. blocked .. - // x: Release share on mutator_lock_ | Carry out exclusive access - // Acquire thread_suspend_count_lock_ | .. exclusive .. - // while Thread::suspend_count_ > 0 | .. exclusive .. - // - wait on Thread::resume_cond_ | .. exclusive .. - // (releases thread_suspend_count_lock_) | .. exclusive .. - // .. waiting .. | Release mutator_lock_ - // .. waiting .. | Request thread resumption by: - // .. waiting .. | - acquiring thread_suspend_count_lock_ - // .. waiting .. | - decrementing Thread::suspend_count_ on - // .. waiting .. | all mutator threads - // .. waiting .. | - notifying on Thread::resume_cond_ - // - re-acquire thread_suspend_count_lock_ | - releasing thread_suspend_count_lock_ - // Release thread_suspend_count_lock_ | .. running .. - // Acquire share on mutator_lock_ | .. running .. - // - This could block but the thread still | .. running .. - // has a state of kSuspended and so this | .. running .. - // isn't an issue. | .. running .. - // Acquire thread_suspend_count_lock_ | .. running .. - // - we poll here as we're transitioning into | .. running .. - // kRunnable and an individual thread suspend | .. running .. - // request (e.g for debugging) won't try | .. running .. - // to acquire the mutator lock (which would | .. running .. - // block as we hold the mutator lock). This | .. running .. - // poll ensures that if the suspender thought | .. running .. - // we were suspended by incrementing our | .. running .. - // Thread::suspend_count_ and then reading | .. running .. - // our state we go back to waiting on | .. running .. - // Thread::resume_cond_. | .. running .. - // can_go_runnable = Thread::suspend_count_ == 0 | .. running .. - // Release thread_suspend_count_lock_ | .. running .. - // if can_go_runnable | .. running .. - // Change state to kRunnable | .. running .. - // else | .. running .. - // Goto x | .. running .. - // .. running .. | .. running .. - static ReaderWriterMutex* mutator_lock_; - - // Allow reader-writer mutual exclusion on the mark and live bitmaps of the heap. - static ReaderWriterMutex* heap_bitmap_lock_ ACQUIRED_AFTER(mutator_lock_); - - // Guards shutdown of the runtime. - static Mutex* runtime_shutdown_lock_ ACQUIRED_AFTER(heap_bitmap_lock_); - - // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads - // attaching and detaching. - static Mutex* thread_list_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_); - - // Guards breakpoints. - static Mutex* breakpoint_lock_ ACQUIRED_AFTER(thread_list_lock_); - - // Guards deoptimization requests. - static Mutex* deoptimization_lock_ ACQUIRED_AFTER(breakpoint_lock_); - - // Guards trace requests. - static Mutex* trace_lock_ ACQUIRED_AFTER(deoptimization_lock_); - - // Guards profile objects. - static Mutex* profiler_lock_ ACQUIRED_AFTER(trace_lock_); - - // Guards lists of classes within the class linker. - static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(profiler_lock_); - - // When declaring any Mutex add DEFAULT_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code - // doesn't try to hold a higher level Mutex. - #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(classlinker_classes_lock_) - - // Guards intern table. - static Mutex* intern_table_lock_ ACQUIRED_AFTER(classlinker_classes_lock_); - - // Have an exclusive aborting thread. - static Mutex* abort_lock_ ACQUIRED_AFTER(classlinker_classes_lock_); - - // Allow mutual exclusion when manipulating Thread::suspend_count_. - // TODO: Does the trade-off of a per-thread lock make sense? - static Mutex* thread_suspend_count_lock_ ACQUIRED_AFTER(abort_lock_); - - // One unexpected signal at a time lock. - static Mutex* unexpected_signal_lock_ ACQUIRED_AFTER(thread_suspend_count_lock_); - - // Have an exclusive logging thread. - static Mutex* logging_lock_ ACQUIRED_AFTER(unexpected_signal_lock_); -}; - -} // namespace art - -#endif // ART_RUNTIME_LOCKS_H_ diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h index a18e171960..a61698d709 100644 --- a/runtime/mirror/art_method.h +++ b/runtime/mirror/art_method.h @@ -20,7 +20,6 @@ #include "class.h" #include "dex_file.h" #include "invoke_type.h" -#include "locks.h" #include "modifiers.h" #include "object.h" #include "object_callbacks.h" diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h index f9a5ea2b91..76ab94c65e 100644 --- a/runtime/mirror/class.h +++ b/runtime/mirror/class.h @@ -17,7 +17,6 @@ #ifndef ART_RUNTIME_MIRROR_CLASS_H_ #define ART_RUNTIME_MIRROR_CLASS_H_ -#include "gc/heap.h" #include "invoke_type.h" #include "modifiers.h" #include "object.h" diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h index ded4e0ae7a..4e2c624516 100644 --- a/runtime/mirror/object.h +++ b/runtime/mirror/object.h @@ -21,6 +21,7 @@ #include "base/logging.h" #include "base/macros.h" #include "cutils/atomic-inline.h" +#include "monitor.h" #include "object_reference.h" #include "offsets.h" #include "runtime.h" @@ -30,7 +31,6 @@ namespace art { class ImageWriter; class LockWord; -class Monitor; struct ObjectOffsets; class Thread; template <typename T> class SirtRef; @@ -64,7 +64,7 @@ class Throwable; static constexpr bool kCheckFieldAssignments = false; // C++ mirror of java.lang.Object -class MANAGED Object { +class MANAGED LOCKABLE Object { public: static MemberOffset ClassOffset() { return OFFSET_OF_OBJECT_MEMBER(Object, klass_); @@ -104,9 +104,9 @@ class MANAGED Object { uint32_t GetLockOwnerThreadId(); mirror::Object* MonitorEnter(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) - EXCLUSIVE_LOCK_FUNCTION(monitor_lock_); + EXCLUSIVE_LOCK_FUNCTION(); bool MonitorExit(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) - UNLOCK_FUNCTION(monitor_lock_); + UNLOCK_FUNCTION(); void Notify(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void NotifyAll(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void Wait(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); diff --git a/runtime/mirror/object_reference.h b/runtime/mirror/object_reference.h index b30890f99a..72f281df06 100644 --- a/runtime/mirror/object_reference.h +++ b/runtime/mirror/object_reference.h @@ -17,7 +17,8 @@ #ifndef ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_ #define ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_ -#include "locks.h" +#include "base/mutex.h" +#include "globals.h" namespace art { namespace mirror { @@ -74,7 +75,7 @@ class MANAGED ObjectReference { // References between objects within the managed heap. template<class MirrorType> -class MANAGED HeapReference : public ObjectReference<false, MirrorType> { +class MANAGED HeapReference : public ObjectReference<kPoisonHeapReferences, MirrorType> { public: static HeapReference<MirrorType> FromMirrorPtr(MirrorType* mirror_ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { @@ -82,7 +83,7 @@ class MANAGED HeapReference : public ObjectReference<false, MirrorType> { } private: HeapReference<MirrorType>(MirrorType* mirror_ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) - : ObjectReference<false, MirrorType>(mirror_ptr) {} + : ObjectReference<kPoisonHeapReferences, MirrorType>(mirror_ptr) {} }; } // namespace mirror diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc index 02a396acde..5217e5eda3 100644 --- a/runtime/mirror/stack_trace_element.cc +++ b/runtime/mirror/stack_trace_element.cc @@ -20,6 +20,7 @@ #include "class-inl.h" #include "gc/accounting/card_table-inl.h" #include "object-inl.h" +#include "sirt_ref-inl.h" #include "string.h" namespace art { diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h index 779ec4b780..9e023c7dba 100644 --- a/runtime/mirror/stack_trace_element.h +++ b/runtime/mirror/stack_trace_element.h @@ -18,10 +18,10 @@ #define ART_RUNTIME_MIRROR_STACK_TRACE_ELEMENT_H_ #include "object.h" -#include "sirt_ref.h" namespace art { +template<class T> class SirtRef; struct StackTraceElementOffsets; namespace mirror { diff --git a/runtime/monitor.cc b/runtime/monitor.cc index 64794feb48..332aef0b7e 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -650,9 +650,22 @@ void Monitor::InflateThinLocked(Thread* self, SirtRef<mirror::Object>& obj, Lock } } +// Fool annotalysis into thinking that the lock on obj is acquired. +static mirror::Object* FakeLock(mirror::Object* obj) + EXCLUSIVE_LOCK_FUNCTION(obj) NO_THREAD_SAFETY_ANALYSIS { + return obj; +} + +// Fool annotalysis into thinking that the lock on obj is release. +static mirror::Object* FakeUnlock(mirror::Object* obj) + UNLOCK_FUNCTION(obj) NO_THREAD_SAFETY_ANALYSIS { + return obj; +} + mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj) { DCHECK(self != NULL); DCHECK(obj != NULL); + obj = FakeLock(obj); uint32_t thread_id = self->GetThreadId(); size_t contention_count = 0; SirtRef<mirror::Object> sirt_obj(self, obj); @@ -698,24 +711,22 @@ mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj) { mon->Lock(self); return sirt_obj.get(); // Success! } - case LockWord::kHashCode: { + case LockWord::kHashCode: // Inflate with the existing hashcode. Inflate(self, nullptr, sirt_obj.get(), lock_word.GetHashCode()); - break; - } + continue; // Start from the beginning. default: { LOG(FATAL) << "Invalid monitor state " << lock_word.GetState(); return sirt_obj.get(); } } } - return sirt_obj.get(); } bool Monitor::MonitorExit(Thread* self, mirror::Object* obj) { DCHECK(self != NULL); DCHECK(obj != NULL); - + obj = FakeUnlock(obj); LockWord lock_word = obj->GetLockWord(); SirtRef<mirror::Object> sirt_obj(self, obj); switch (lock_word.GetState()) { diff --git a/runtime/monitor.h b/runtime/monitor.h index d0a3a2ed2b..55504b5943 100644 --- a/runtime/monitor.h +++ b/runtime/monitor.h @@ -27,16 +27,18 @@ #include "atomic.h" #include "base/mutex.h" #include "object_callbacks.h" -#include "sirt_ref.h" #include "thread_state.h" namespace art { +template<class T> class SirtRef; + namespace mirror { class ArtMethod; class Object; } // namespace mirror class LockWord; +template<class T> class SirtRef; class Thread; class StackVisitor; @@ -58,11 +60,11 @@ class Monitor { NO_THREAD_SAFETY_ANALYSIS; // TODO: Reading lock owner without holding lock is racy. static mirror::Object* MonitorEnter(Thread* thread, mirror::Object* obj) - EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) + EXCLUSIVE_LOCK_FUNCTION(obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); static bool MonitorExit(Thread* thread, mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) - UNLOCK_FUNCTION(monitor_lock_); + UNLOCK_FUNCTION(obj); static void Notify(Thread* self, mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { @@ -178,6 +180,7 @@ class Monitor { static uint32_t lock_profiling_threshold_; Mutex monitor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + ConditionVariable monitor_contenders_ GUARDED_BY(monitor_lock_); // Number of people waiting on the condition. diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc index 19e569d204..eb7525a6ad 100644 --- a/runtime/monitor_pool.cc +++ b/runtime/monitor_pool.cc @@ -18,6 +18,7 @@ #include "base/logging.h" #include "base/mutex-inl.h" +#include "thread-inl.h" #include "monitor.h" namespace art { diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h index 32f3f4ebe3..82d0feef4d 100644 --- a/runtime/monitor_pool.h +++ b/runtime/monitor_pool.h @@ -17,11 +17,14 @@ #ifndef ART_RUNTIME_MONITOR_POOL_H_ #define ART_RUNTIME_MONITOR_POOL_H_ -#include "monitor.h" +#ifdef __LP64__ +#include <bitset> +#include <stdint.h> +#include "monitor.h" +#include "runtime.h" #include "safe_map.h" - -#include <stdint.h> +#endif namespace art { diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc index f48e8ad07a..4aa1d1011d 100644 --- a/runtime/native/dalvik_system_VMRuntime.cc +++ b/runtime/native/dalvik_system_VMRuntime.cc @@ -177,6 +177,8 @@ static void VMRuntime_setTargetSdkVersionNative(JNIEnv* env, jobject, jint targe << targetSdkVersion << "..."; vm->work_around_app_jni_bugs = true; + LOG(WARNING) << "Permenantly disabling heap compaction due to jni workarounds"; + Runtime::Current()->GetHeap()->DisableCompaction(); } } } @@ -204,12 +206,11 @@ static void VMRuntime_updateProcessState(JNIEnv* env, jobject, jint process_stat } static void VMRuntime_trimHeap(JNIEnv*, jobject) { - Runtime::Current()->GetHeap()->Trim(); + Runtime::Current()->GetHeap()->DoPendingTransitionOrTrim(); } static void VMRuntime_concurrentGC(JNIEnv* env, jobject) { - Thread* self = ThreadForEnv(env); - Runtime::Current()->GetHeap()->ConcurrentGC(self); + Runtime::Current()->GetHeap()->ConcurrentGC(ThreadForEnv(env)); } typedef std::map<std::string, mirror::String*> StringTable; diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc index 0629f4d71c..f6149fff44 100644 --- a/runtime/native/java_lang_Runtime.cc +++ b/runtime/native/java_lang_Runtime.cc @@ -24,6 +24,7 @@ #include "runtime.h" #include "scoped_thread_state_change.h" #include "ScopedUtfChars.h" +#include "sirt_ref-inl.h" namespace art { diff --git a/runtime/native/scoped_fast_native_object_access.h b/runtime/native/scoped_fast_native_object_access.h index b5ee748425..645d78cce8 100644 --- a/runtime/native/scoped_fast_native_object_access.h +++ b/runtime/native/scoped_fast_native_object_access.h @@ -80,8 +80,6 @@ class ScopedFastNativeObjectAccess { return NULL; } - VerifyObject(obj); - DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000); IndirectReferenceTable& locals = Env()->locals; diff --git a/runtime/nth_caller_visitor.h b/runtime/nth_caller_visitor.h index 794878a08e..374a80ea28 100644 --- a/runtime/nth_caller_visitor.h +++ b/runtime/nth_caller_visitor.h @@ -17,8 +17,8 @@ #ifndef ART_RUNTIME_NTH_CALLER_VISITOR_H_ #define ART_RUNTIME_NTH_CALLER_VISITOR_H_ +#include "base/mutex.h" #include "mirror/art_method.h" -#include "locks.h" #include "stack.h" namespace art { diff --git a/runtime/object_utils.h b/runtime/object_utils.h index 4eac29164e..dd2bd4fafe 100644 --- a/runtime/object_utils.h +++ b/runtime/object_utils.h @@ -28,7 +28,7 @@ #include "mirror/string.h" #include "runtime.h" -#include "sirt_ref.h" +#include "sirt_ref-inl.h" #include <string> diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc index 04f1a05a3d..37db4624be 100644 --- a/runtime/parsed_options.cc +++ b/runtime/parsed_options.cc @@ -147,7 +147,13 @@ bool ParsedOptions::Parse(const Runtime::Options& options, bool ignore_unrecogni compiler_callbacks_ = nullptr; is_zygote_ = false; - interpreter_only_ = false; + if (kPoisonHeapReferences) { + // kPoisonHeapReferences currently works only with the interpreter only. + // TODO: make it work with the compiler. + interpreter_only_ = true; + } else { + interpreter_only_ = false; + } is_explicit_gc_disabled_ = false; long_pause_log_threshold_ = gc::Heap::kDefaultLongPauseLogThreshold; diff --git a/runtime/profiler.h b/runtime/profiler.h index e3af47cf50..6ea6c84f08 100644 --- a/runtime/profiler.h +++ b/runtime/profiler.h @@ -22,15 +22,14 @@ #include <string> #include <vector> +#include "barrier.h" #include "base/macros.h" +#include "base/mutex.h" #include "globals.h" #include "instrumentation.h" #include "os.h" #include "safe_map.h" -#include "base/mutex.h" -#include "locks.h" #include "UniquePtr.h" -#include "barrier.h" namespace art { diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc index f43a15b83d..a3119bbd12 100644 --- a/runtime/reference_table.cc +++ b/runtime/reference_table.cc @@ -40,6 +40,7 @@ ReferenceTable::~ReferenceTable() { void ReferenceTable::Add(mirror::Object* obj) { DCHECK(obj != NULL); + VerifyObject(obj); if (entries_.size() >= max_size_) { LOG(FATAL) << "ReferenceTable '" << name_ << "' " << "overflowed (" << max_size_ << " entries)"; diff --git a/runtime/reference_table.h b/runtime/reference_table.h index c9f5bc5c57..45309c9d99 100644 --- a/runtime/reference_table.h +++ b/runtime/reference_table.h @@ -22,8 +22,8 @@ #include <string> #include <vector> +#include "base/mutex.h" #include "object_callbacks.h" -#include "locks.h" namespace art { namespace mirror { diff --git a/runtime/runtime.cc b/runtime/runtime.cc index de06fb8ee0..fdbf2456a4 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -94,7 +94,7 @@ Runtime::Runtime() default_imt_(nullptr), fault_message_lock_("Fault message lock"), fault_message_(""), - method_verifiers_lock_("Method verifiers lock"), + method_verifier_lock_("Method verifiers lock"), threads_being_born_(0), shutdown_cond_(new ConditionVariable("Runtime shutdown", *Locks::runtime_shutdown_lock_)), shutting_down_(false), @@ -851,7 +851,7 @@ void Runtime::VisitNonThreadRoots(RootCallback* callback, void* arg) { } } { - MutexLock mu(Thread::Current(), method_verifiers_lock_); + MutexLock mu(Thread::Current(), method_verifier_lock_); for (verifier::MethodVerifier* verifier : method_verifiers_) { verifier->VisitRoots(callback, arg); } @@ -1043,13 +1043,13 @@ void Runtime::SetCompileTimeClassPath(jobject class_loader, void Runtime::AddMethodVerifier(verifier::MethodVerifier* verifier) { DCHECK(verifier != nullptr); - MutexLock mu(Thread::Current(), method_verifiers_lock_); + MutexLock mu(Thread::Current(), method_verifier_lock_); method_verifiers_.insert(verifier); } void Runtime::RemoveMethodVerifier(verifier::MethodVerifier* verifier) { DCHECK(verifier != nullptr); - MutexLock mu(Thread::Current(), method_verifiers_lock_); + MutexLock mu(Thread::Current(), method_verifier_lock_); auto it = method_verifiers_.find(verifier); CHECK(it != method_verifiers_.end()); method_verifiers_.erase(it); diff --git a/runtime/runtime.h b/runtime/runtime.h index 87307ae223..65d296a3dc 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -33,7 +33,6 @@ #include "instruction_set.h" #include "instrumentation.h" #include "jobject_comparator.h" -#include "locks.h" #include "object_callbacks.h" #include "runtime_stats.h" #include "safe_map.h" @@ -471,7 +470,7 @@ class Runtime { std::string fault_message_ GUARDED_BY(fault_message_lock_); // Method verifier set, used so that we can update their GC roots. - Mutex method_verifiers_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + Mutex method_verifier_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; std::set<verifier::MethodVerifier*> method_verifiers_; // A non-zero value indicates that a thread has been created but not yet initialized. Guarded by diff --git a/runtime/safe_map.h b/runtime/safe_map.h index 89da927cc2..393bf92ba2 100644 --- a/runtime/safe_map.h +++ b/runtime/safe_map.h @@ -33,10 +33,17 @@ class SafeMap { typedef SafeMap<K, V, Comparator, Allocator> Self; public: - typedef typename ::std::map<K, V, Comparator>::iterator iterator; - typedef typename ::std::map<K, V, Comparator>::const_iterator const_iterator; - typedef typename ::std::map<K, V, Comparator>::size_type size_type; - typedef typename ::std::map<K, V, Comparator>::value_type value_type; + typedef typename ::std::map<K, V, Comparator, Allocator>::key_compare key_compare; + typedef typename ::std::map<K, V, Comparator, Allocator>::allocator_type allocator_type; + typedef typename ::std::map<K, V, Comparator, Allocator>::iterator iterator; + typedef typename ::std::map<K, V, Comparator, Allocator>::const_iterator const_iterator; + typedef typename ::std::map<K, V, Comparator, Allocator>::size_type size_type; + typedef typename ::std::map<K, V, Comparator, Allocator>::value_type value_type; + + SafeMap() = default; + explicit SafeMap(const key_compare& cmp, const allocator_type& allocator = allocator_type()) + : map_(cmp, allocator) { + } Self& operator=(const Self& rhs) { map_ = rhs.map_; diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h index f0f5ed263d..d9e7986efe 100644 --- a/runtime/scoped_thread_state_change.h +++ b/runtime/scoped_thread_state_change.h @@ -169,8 +169,6 @@ class ScopedObjectAccessUnchecked : public ScopedThreadStateChange { return NULL; } - VerifyObject(obj); - DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000); IndirectReferenceTable& locals = Env()->locals; diff --git a/runtime/sirt_ref-inl.h b/runtime/sirt_ref-inl.h new file mode 100644 index 0000000000..7f2d847fa8 --- /dev/null +++ b/runtime/sirt_ref-inl.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_SIRT_REF_INL_H_ +#define ART_RUNTIME_SIRT_REF_INL_H_ + +#include "sirt_ref.h" + +#include "verify_object-inl.h" + +namespace art { + +template<class T> inline SirtRef<T>::SirtRef(Thread* self, T* object) : self_(self), sirt_(object) { + VerifyObject(object); + self_->PushSirt(&sirt_); +} + +template<class T> inline SirtRef<T>::~SirtRef() { + StackIndirectReferenceTable* top_sirt = self_->PopSirt(); + DCHECK_EQ(top_sirt, &sirt_); +} + +template<class T> inline T* SirtRef<T>::reset(T* object) { + VerifyObject(object); + T* old_ref = get(); + sirt_.SetReference(0, object); + return old_ref; +} + +} // namespace art + +#endif // ART_RUNTIME_SIRT_REF_INL_H_ diff --git a/runtime/sirt_ref.h b/runtime/sirt_ref.h index b22e816e2d..2226e17f56 100644 --- a/runtime/sirt_ref.h +++ b/runtime/sirt_ref.h @@ -20,6 +20,7 @@ #include "base/casts.h" #include "base/logging.h" #include "base/macros.h" +#include "stack_indirect_reference_table.h" #include "thread.h" namespace art { @@ -27,13 +28,8 @@ namespace art { template<class T> class SirtRef { public: - SirtRef(Thread* self, T* object) : self_(self), sirt_(object) { - self_->PushSirt(&sirt_); - } - ~SirtRef() { - StackIndirectReferenceTable* top_sirt = self_->PopSirt(); - DCHECK_EQ(top_sirt, &sirt_); - } + SirtRef(Thread* self, T* object); + ~SirtRef(); T& operator*() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return *get(); @@ -46,11 +42,7 @@ class SirtRef { } // Returns the old reference. - T* reset(T* object = nullptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - T* old_ref = get(); - sirt_.SetReference(0, object); - return old_ref; - } + T* reset(T* object = nullptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); private: Thread* const self_; diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h index f7e88cc75b..66077f904e 100644 --- a/runtime/thread-inl.h +++ b/runtime/thread-inl.h @@ -146,9 +146,10 @@ inline ThreadState Thread::TransitionFromSuspendedToRunnable() { if (UNLIKELY(!done)) { // Failed to transition to Runnable. Release shared mutator_lock_ access and try again. Locks::mutator_lock_->SharedUnlock(this); + } else { + return static_cast<ThreadState>(old_state); } - } while (UNLIKELY(!done)); - return static_cast<ThreadState>(old_state); + } while (true); } inline void Thread::VerifyStack() { diff --git a/runtime/thread.cc b/runtime/thread.cc index 0ad01906e5..6d8ede5aa5 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -864,7 +864,8 @@ void Thread::DumpStack(std::ostream& os) const { // If we're currently in native code, dump that stack before dumping the managed stack. if (dump_for_abort || ShouldShowNativeStack(this)) { DumpKernelStack(os, GetTid(), " kernel: ", false); - DumpNativeStack(os, GetTid(), " native: ", false); + SirtRef<mirror::ArtMethod> method_ref(Thread::Current(), GetCurrentMethod(nullptr)); + DumpNativeStack(os, GetTid(), " native: ", false, method_ref.get()); } UniquePtr<Context> context(Context::Create()); StackDumpVisitor dumper(os, const_cast<Thread*>(this), context.get(), !throwing_OutOfMemoryError_); @@ -1196,16 +1197,18 @@ mirror::Object* Thread::DecodeJObject(jobject obj) const { // The "kinds" below are sorted by the frequency we expect to encounter them. if (kind == kLocal) { IndirectReferenceTable& locals = jni_env_->locals; - result = const_cast<mirror::Object*>(locals.Get(ref)); + result = locals.Get(ref); } else if (kind == kSirtOrInvalid) { // TODO: make stack indirect reference table lookup more efficient. // Check if this is a local reference in the SIRT. if (LIKELY(SirtContains(obj))) { // Read from SIRT. result = reinterpret_cast<StackReference<mirror::Object>*>(obj)->AsMirrorPtr(); + VerifyObject(result); } else if (Runtime::Current()->GetJavaVM()->work_around_app_jni_bugs) { // Assume an invalid local reference is actually a direct pointer. result = reinterpret_cast<mirror::Object*>(obj); + VerifyObject(result); } else { result = kInvalidIndirectRefObject; } @@ -1225,10 +1228,6 @@ mirror::Object* Thread::DecodeJObject(jobject obj) const { if (UNLIKELY(result == nullptr)) { JniAbortF(nullptr, "use of deleted %s %p", ToStr<IndirectRefKind>(kind).c_str(), obj); - } else { - if (result != kInvalidIndirectRefObject) { - VerifyObject(result); - } } return result; } diff --git a/runtime/thread.h b/runtime/thread.h index c7ab735465..2ebc107942 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -24,13 +24,13 @@ #include <string> #include "base/macros.h" +#include "base/mutex.h" #include "entrypoints/interpreter/interpreter_entrypoints.h" #include "entrypoints/jni/jni_entrypoints.h" #include "entrypoints/portable/portable_entrypoints.h" #include "entrypoints/quick/quick_entrypoints.h" #include "globals.h" #include "jvalue.h" -#include "locks.h" #include "object_callbacks.h" #include "offsets.h" #include "runtime_stats.h" diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index d311945180..bddebbd5e7 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -151,7 +151,8 @@ void ThreadList::AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread #if HAVE_TIMED_RWLOCK // Attempt to rectify locks so that we dump thread list with required locks before exiting. -static void UnsafeLogFatalForThreadSuspendAllTimeout(Thread* self) NO_THREAD_SAFETY_ANALYSIS { +static void UnsafeLogFatalForThreadSuspendAllTimeout(Thread* self) NO_THREAD_SAFETY_ANALYSIS __attribute__((noreturn)); +static void UnsafeLogFatalForThreadSuspendAllTimeout(Thread* self) { Runtime* runtime = Runtime::Current(); std::ostringstream ss; ss << "Thread suspend timeout\n"; @@ -159,6 +160,7 @@ static void UnsafeLogFatalForThreadSuspendAllTimeout(Thread* self) NO_THREAD_SAF ss << "\n"; runtime->GetThreadList()->DumpLocked(ss); LOG(FATAL) << ss.str(); + exit(0); } #endif @@ -193,10 +195,10 @@ static void ThreadSuspendSleep(Thread* self, useconds_t* delay_us, useconds_t* t size_t ThreadList::RunCheckpoint(Closure* checkpoint_function) { Thread* self = Thread::Current(); - if (kIsDebugBuild) { - Locks::mutator_lock_->AssertNotExclusiveHeld(self); - Locks::thread_list_lock_->AssertNotHeld(self); - Locks::thread_suspend_count_lock_->AssertNotHeld(self); + Locks::mutator_lock_->AssertNotExclusiveHeld(self); + Locks::thread_list_lock_->AssertNotHeld(self); + Locks::thread_suspend_count_lock_->AssertNotHeld(self); + if (kDebugLocking) { CHECK_NE(self->GetState(), kRunnable); } @@ -273,41 +275,41 @@ void ThreadList::SuspendAll() { VLOG(threads) << *self << " SuspendAll starting..."; - if (kIsDebugBuild) { - Locks::mutator_lock_->AssertNotHeld(self); - Locks::thread_list_lock_->AssertNotHeld(self); - Locks::thread_suspend_count_lock_->AssertNotHeld(self); + Locks::mutator_lock_->AssertNotHeld(self); + Locks::thread_list_lock_->AssertNotHeld(self); + Locks::thread_suspend_count_lock_->AssertNotHeld(self); + if (kDebugLocking) { CHECK_NE(self->GetState(), kRunnable); } { MutexLock mu(self, *Locks::thread_list_lock_); - { - MutexLock mu2(self, *Locks::thread_suspend_count_lock_); - // Update global suspend all state for attaching threads. - ++suspend_all_count_; - // Increment everybody's suspend count (except our own). - for (const auto& thread : list_) { - if (thread == self) { - continue; - } - VLOG(threads) << "requesting thread suspend: " << *thread; - thread->ModifySuspendCount(self, +1, false); + MutexLock mu2(self, *Locks::thread_suspend_count_lock_); + // Update global suspend all state for attaching threads. + ++suspend_all_count_; + // Increment everybody's suspend count (except our own). + for (const auto& thread : list_) { + if (thread == self) { + continue; } + VLOG(threads) << "requesting thread suspend: " << *thread; + thread->ModifySuspendCount(self, +1, false); } } // Block on the mutator lock until all Runnable threads release their share of access. #if HAVE_TIMED_RWLOCK // Timeout if we wait more than 30 seconds. - if (UNLIKELY(!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0))) { + if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) { UnsafeLogFatalForThreadSuspendAllTimeout(self); } #else Locks::mutator_lock_->ExclusiveLock(self); #endif - // Debug check that all threads are suspended. - AssertThreadsAreSuspended(self, self); + if (kDebugLocking) { + // Debug check that all threads are suspended. + AssertThreadsAreSuspended(self, self); + } VLOG(threads) << *self << " SuspendAll complete"; } @@ -317,8 +319,10 @@ void ThreadList::ResumeAll() { VLOG(threads) << *self << " ResumeAll starting"; - // Debug check that all threads are suspended. - AssertThreadsAreSuspended(self, self); + if (kDebugLocking) { + // Debug check that all threads are suspended. + AssertThreadsAreSuspended(self, self); + } Locks::mutator_lock_->ExclusiveUnlock(self); { diff --git a/runtime/thread_list.h b/runtime/thread_list.h index e98aed9c5d..1a76705f76 100644 --- a/runtime/thread_list.h +++ b/runtime/thread_list.h @@ -86,7 +86,7 @@ class ThreadList { // Run a checkpoint on threads, running threads are not suspended but run the checkpoint inside // of the suspend check. Returns how many checkpoints we should expect to run. - size_t RunCheckpoint(Closure* checkpoint_function); + size_t RunCheckpoint(Closure* checkpoint_function) LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::thread_suspend_count_lock_); diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h index e8f9afe62d..b8735a3136 100644 --- a/runtime/thread_pool.h +++ b/runtime/thread_pool.h @@ -23,7 +23,6 @@ #include "barrier.h" #include "base/mutex.h" #include "closure.h" -#include "locks.h" #include "mem_map.h" namespace art { diff --git a/runtime/throw_location.h b/runtime/throw_location.h index f30aa4ea1e..c171b0783a 100644 --- a/runtime/throw_location.h +++ b/runtime/throw_location.h @@ -19,6 +19,7 @@ #include "object_callbacks.h" #include "base/macros.h" +#include "base/mutex.h" #include <stdint.h> #include <string> diff --git a/runtime/transaction.h b/runtime/transaction.h index 68f95402c6..cf696de32b 100644 --- a/runtime/transaction.h +++ b/runtime/transaction.h @@ -19,10 +19,9 @@ #include "base/macros.h" #include "base/mutex.h" -#include "locks.h" +#include "object_callbacks.h" #include "offsets.h" #include "primitive.h" -#include "object_callbacks.h" #include "safe_map.h" #include <list> diff --git a/runtime/utf.h b/runtime/utf.h index 5b2289ef19..29f84997e6 100644 --- a/runtime/utf.h +++ b/runtime/utf.h @@ -18,6 +18,7 @@ #define ART_RUNTIME_UTF_H_ #include "base/macros.h" +#include "base/mutex.h" #include <stddef.h> #include <stdint.h> diff --git a/runtime/utils.cc b/runtime/utils.cc index 237d217f0b..d2d23e8de6 100644 --- a/runtime/utils.cc +++ b/runtime/utils.cc @@ -38,6 +38,7 @@ #include "mirror/string.h" #include "object_utils.h" #include "os.h" +#include "scoped_thread_state_change.h" #include "utf-inl.h" #if !defined(HAVE_POSIX_CLOCKS) @@ -1052,7 +1053,12 @@ static std::string CleanMapName(const backtrace_map_t* map) { return map->name.substr(last_slash + 1); } -void DumpNativeStack(std::ostream& os, pid_t tid, const char* prefix, bool include_count) { +void DumpNativeStack(std::ostream& os, pid_t tid, const char* prefix, bool include_count, + mirror::ArtMethod* current_method) { + // We may be called from contexts where current_method is not null, so we must assert this. + if (current_method != nullptr) { + Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); + } UniquePtr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid)); if (!backtrace->Unwind(0)) { os << prefix << "(backtrace::Unwind failed for thread " << tid << ")\n"; @@ -1073,7 +1079,11 @@ void DumpNativeStack(std::ostream& os, pid_t tid, const char* prefix, bool inclu if (!it->func_name.empty()) { os << it->func_name; } else { - os << "???"; + if (current_method != nullptr && current_method->IsWithinQuickCode(it->pc)) { + os << JniLongName(current_method) << "+" << (it->pc - current_method->GetQuickOatCodeOffset()); + } else { + os << "???"; + } } if (it->func_offset != 0) { os << "+" << it->func_offset; diff --git a/runtime/utils.h b/runtime/utils.h index bcbeb0ea63..dbc3ab7634 100644 --- a/runtime/utils.h +++ b/runtime/utils.h @@ -373,7 +373,9 @@ std::string GetSchedulerGroupName(pid_t tid); void SetThreadName(const char* thread_name); // Dumps the native stack for thread 'tid' to 'os'. -void DumpNativeStack(std::ostream& os, pid_t tid, const char* prefix = "", bool include_count = true); +void DumpNativeStack(std::ostream& os, pid_t tid, const char* prefix = "", + bool include_count = true, mirror::ArtMethod* current_method = nullptr) + NO_THREAD_SAFETY_ANALYSIS; // Dumps the kernel stack for thread 'tid' to 'os'. Note that this is only available on linux-x86. void DumpKernelStack(std::ostream& os, pid_t tid, const char* prefix = "", bool include_count = true); diff --git a/runtime/verifier/method_verifier-inl.h b/runtime/verifier/method_verifier-inl.h index 74c3e33531..c5543940e8 100644 --- a/runtime/verifier/method_verifier-inl.h +++ b/runtime/verifier/method_verifier-inl.h @@ -21,6 +21,7 @@ #include "method_verifier.h" #include "mirror/class_loader.h" #include "mirror/dex_cache.h" +#include "sirt_ref-inl.h" namespace art { namespace verifier { diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc index 555714f8f5..c4c3082918 100644 --- a/runtime/verifier/method_verifier.cc +++ b/runtime/verifier/method_verifier.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "method_verifier.h" +#include "method_verifier-inl.h" #include <iostream> @@ -40,6 +40,7 @@ #include "register_line-inl.h" #include "runtime.h" #include "scoped_thread_state_change.h" +#include "sirt_ref-inl.h" #include "verifier/dex_gc_map.h" namespace art { diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h index 031cfec3b1..5f13191bbe 100644 --- a/runtime/verifier/method_verifier.h +++ b/runtime/verifier/method_verifier.h @@ -33,12 +33,12 @@ #include "reg_type_cache-inl.h" #include "register_line.h" #include "safe_map.h" -#include "sirt_ref.h" #include "UniquePtr.h" namespace art { struct ReferenceMap2Visitor; +template<class T> class SirtRef; namespace verifier { diff --git a/runtime/verify_object.h b/runtime/verify_object.h index b39df4a374..6640e0dd4a 100644 --- a/runtime/verify_object.h +++ b/runtime/verify_object.h @@ -17,10 +17,10 @@ #ifndef ART_RUNTIME_VERIFY_OBJECT_H_ #define ART_RUNTIME_VERIFY_OBJECT_H_ -#include "locks.h" - #include <stdint.h> +#include "base/macros.h" + namespace art { namespace mirror { @@ -52,10 +52,10 @@ static constexpr VerifyObjectFlags kDefaultVerifyFlags = kVerifyNone; static constexpr VerifyObjectMode kVerifyObjectSupport = kDefaultVerifyFlags != 0 ? kVerifyObjectModeFast : kVerifyObjectModeDisabled; -ALWAYS_INLINE inline void VerifyObject(mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS; +void VerifyObject(mirror::Object* obj) ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS; // Check that c.getClass() == c.getClass().getClass(). -ALWAYS_INLINE inline bool VerifyClassClass(mirror::Class* c) NO_THREAD_SAFETY_ANALYSIS; +bool VerifyClassClass(mirror::Class* c) ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS; } // namespace art |