diff options
96 files changed, 2296 insertions, 1460 deletions
diff --git a/build/Android.common.mk b/build/Android.common.mk index 09f34b3092..f916e1ee6f 100644 --- a/build/Android.common.mk +++ b/build/Android.common.mk @@ -134,7 +134,7 @@ endif # Clang on the target: only enabled for ARM64. Target builds use GCC by default. ART_TARGET_CLANG := ART_TARGET_CLANG_arm := -ART_TARGET_CLANG_arm64 := true +ART_TARGET_CLANG_arm64 := ART_TARGET_CLANG_mips := ART_TARGET_CLANG_x86 := ART_TARGET_CLANG_x86_64 := diff --git a/build/Android.oat.mk b/build/Android.oat.mk index fbb7eb36c6..c67a815832 100644 --- a/build/Android.oat.mk +++ b/build/Android.oat.mk @@ -42,6 +42,11 @@ $(HOST_CORE_IMG_OUT): $(HOST_CORE_DEX_FILES) $(DEX2OATD_DEPENDENCY) $(HOST_CORE_OAT_OUT): $(HOST_CORE_IMG_OUT) +IMPLICIT_CHECKS_arm := null,stack +IMPLICIT_CHECKS_arm64 := none +IMPLICIT_CHECKS_x86 := none +IMPLICIT_CHECKS_x86_64 := none +IMPLICIT_CHECKS_mips := none define create-oat-target-targets $$($(1)TARGET_CORE_IMG_OUT): $$($(1)TARGET_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY) @echo "target dex2oat: $$@ ($$?)" @@ -49,6 +54,7 @@ $$($(1)TARGET_CORE_IMG_OUT): $$($(1)TARGET_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENC $$(hide) $$(DEX2OATD) --runtime-arg -Xms16m --runtime-arg -Xmx16m --image-classes=$$(PRELOADED_CLASSES) $$(addprefix \ --dex-file=,$$(TARGET_CORE_DEX_FILES)) $$(addprefix --dex-location=,$$(TARGET_CORE_DEX_LOCATIONS)) --oat-file=$$($(1)TARGET_CORE_OAT_OUT) \ --oat-location=$$($(1)TARGET_CORE_OAT) --image=$$($(1)TARGET_CORE_IMG_OUT) --base=$$(LIBART_IMG_TARGET_BASE_ADDRESS) \ + --implicit-checks=$(IMPLICIT_CHECKS_$($(1)TARGET_ARCH)) \ --instruction-set=$$($(1)TARGET_ARCH) --instruction-set-features=$$(TARGET_INSTRUCTION_SET_FEATURES) --android-root=$$(PRODUCT_OUT)/system # This "renaming" eases declaration in art/Android.mk @@ -58,7 +64,7 @@ $$($(1)TARGET_CORE_OAT_OUT): $$($(1)TARGET_CORE_IMG_OUT) endef ifdef TARGET_2ND_ARCH -$(eval $(call create-oat-target-targets,2ND_)) + $(eval $(call create-oat-target-targets,2ND_)) endif $(eval $(call create-oat-target-targets,)) diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index 5050d4eb78..45cf2fba7f 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -371,10 +371,10 @@ class CommonCompilerTest : public CommonRuntimeTest { void CompileMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { CHECK(method != nullptr); TimingLogger timings("CommonTest::CompileMethod", false, false); - timings.StartSplit("CompileOne"); + TimingLogger::ScopedTiming t(__FUNCTION__, &timings); compiler_driver_->CompileOne(method, &timings); + TimingLogger::ScopedTiming t2("MakeExecutable", &timings); MakeExecutable(method); - timings.EndSplit(); } void CompileDirectMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name, diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc index 7441daccfe..f098a34ea7 100644 --- a/compiler/compiled_method.cc +++ b/compiler/compiled_method.cc @@ -86,7 +86,11 @@ uint32_t CompiledCode::AlignCode(uint32_t offset, InstructionSet instruction_set } size_t CompiledCode::CodeDelta() const { - switch (instruction_set_) { + return CodeDelta(instruction_set_); +} + +size_t CompiledCode::CodeDelta(InstructionSet instruction_set) { + switch (instruction_set) { case kArm: case kArm64: case kMips: @@ -98,7 +102,7 @@ size_t CompiledCode::CodeDelta() const { return 1; } default: - LOG(FATAL) << "Unknown InstructionSet: " << instruction_set_; + LOG(FATAL) << "Unknown InstructionSet: " << instruction_set; return 0; } } diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index 23cd250678..b8cd851a1f 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -67,6 +67,7 @@ class CompiledCode { // returns the difference between the code address and a usable PC. // mainly to cope with kThumb2 where the lower bit must be set. size_t CodeDelta() const; + static size_t CodeDelta(InstructionSet instruction_set); // Returns a pointer suitable for invoking the code at the argument // code_pointer address. Mainly to cope with kThumb2 where the diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index de9ac4bd01..caecb7a48e 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -527,6 +527,13 @@ enum FixupKind { std::ostream& operator<<(std::ostream& os, const FixupKind& kind); +enum VolatileKind { + kNotVolatile, // Load/Store is not volatile + kVolatile // Load/Store is volatile +}; + +std::ostream& operator<<(std::ostream& os, const VolatileKind& kind); + } // namespace art #endif // ART_COMPILER_DEX_COMPILER_ENUMS_H_ diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index 414d51412e..72990b4be0 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -114,19 +114,20 @@ CompilationUnit::~CompilationUnit() { void CompilationUnit::StartTimingSplit(const char* label) { if (compiler_driver->GetDumpPasses()) { - timings.StartSplit(label); + timings.StartTiming(label); } } void CompilationUnit::NewTimingSplit(const char* label) { if (compiler_driver->GetDumpPasses()) { - timings.NewSplit(label); + timings.EndTiming(); + timings.StartTiming(label); } } void CompilationUnit::EndTiming() { if (compiler_driver->GetDumpPasses()) { - timings.EndSplit(); + timings.EndTiming(); if (enable_debug & (1 << kDebugTimings)) { LOG(INFO) << "TIMINGS " << PrettyMethod(method_idx, *dex_file); LOG(INFO) << Dumpable<TimingLogger>(timings); @@ -783,10 +784,11 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, uint16_t class_def_idx, uint32_t method_idx, jobject class_loader, const DexFile& dex_file, void* llvm_compilation_unit) { - VLOG(compiler) << "Compiling " << PrettyMethod(method_idx, dex_file) << "..."; + std::string method_name = PrettyMethod(method_idx, dex_file); + VLOG(compiler) << "Compiling " << method_name << "..."; if (code_item->insns_size_in_code_units_ >= 0x10000) { LOG(INFO) << "Method size exceeds compiler limits: " << code_item->insns_size_in_code_units_ - << " in " << PrettyMethod(method_idx, dex_file); + << " in " << method_name; return NULL; } @@ -818,8 +820,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, cu.compiler_flip_match = false; bool use_match = !cu.compiler_method_match.empty(); bool match = use_match && (cu.compiler_flip_match ^ - (PrettyMethod(method_idx, dex_file).find(cu.compiler_method_match) != - std::string::npos)); + (method_name.find(cu.compiler_method_match) != std::string::npos)); if (!use_match || match) { cu.disable_opt = kCompilerOptimizerDisableFlags; cu.enable_debug = kCompilerDebugFlags; @@ -830,7 +831,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, if (gVerboseMethods.size() != 0) { cu.verbose = false; for (size_t i = 0; i < gVerboseMethods.size(); ++i) { - if (PrettyMethod(method_idx, dex_file).find(gVerboseMethods[i]) + if (method_name.find(gVerboseMethods[i]) != std::string::npos) { cu.verbose = true; break; @@ -864,7 +865,9 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, (1 << kPromoteCompilerTemps)); } else if (cu.instruction_set == kX86_64) { // TODO(X86_64): enable optimizations once backend is mature enough. - cu.disable_opt = ~(uint32_t)0; + cu.disable_opt |= ( + (1 << kLoadStoreElimination) | + (1 << kPromoteRegs)); } else if (cu.instruction_set == kArm64) { // TODO(Arm64): enable optimizations once backend is mature enough. cu.disable_opt = ~(uint32_t)0; @@ -885,22 +888,13 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, cu.mir_graph->EnableOpcodeCounting(); } - // Check early if we should skip this compilation if the profiler is enabled. - if (cu.compiler_driver->ProfilePresent()) { - std::string methodname = PrettyMethod(method_idx, dex_file); - if (cu.mir_graph->SkipCompilationByName(methodname)) { - return nullptr; - } - } - /* Build the raw MIR graph */ cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx, class_loader, dex_file); // TODO(Arm64): Remove this when we are able to compile everything. if (!CanCompileMethod(method_idx, dex_file, cu)) { - VLOG(compiler) << cu.instruction_set << ": Cannot compile method : " - << PrettyMethod(method_idx, dex_file); + VLOG(compiler) << cu.instruction_set << ": Cannot compile method : " << method_name; return nullptr; } @@ -908,7 +902,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, std::string skip_message; if (cu.mir_graph->SkipCompilation(&skip_message)) { VLOG(compiler) << cu.instruction_set << ": Skipping method : " - << PrettyMethod(method_idx, dex_file) << " Reason = " << skip_message; + << method_name << " Reason = " << skip_message; return nullptr; } @@ -916,6 +910,13 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, PassDriverMEOpts pass_driver(&cu); pass_driver.Launch(); + /* For non-leaf methods check if we should skip compilation when the profiler is enabled. */ + if (cu.compiler_driver->ProfilePresent() + && !cu.mir_graph->MethodIsLeaf() + && cu.mir_graph->SkipCompilationByName(method_name)) { + return nullptr; + } + if (cu.enable_debug & (1 << kDebugDumpCheckStats)) { cu.mir_graph->DumpCheckStats(); } @@ -931,7 +932,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) { if (cu.arena_stack.PeakBytesAllocated() > 256 * 1024) { MemStats stack_stats(cu.arena_stack.GetPeakStats()); - LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(stack_stats); + LOG(INFO) << method_name << " " << Dumpable<MemStats>(stack_stats); } } cu.arena_stack.Reset(); @@ -939,8 +940,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, CompiledMethod* result = NULL; if (cu.mir_graph->PuntToInterpreter()) { - VLOG(compiler) << cu.instruction_set << ": Punted method to interpreter: " - << PrettyMethod(method_idx, dex_file); + VLOG(compiler) << cu.instruction_set << ": Punted method to interpreter: " << method_name; return nullptr; } @@ -951,21 +951,21 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, cu.NewTimingSplit("Cleanup"); if (result) { - VLOG(compiler) << cu.instruction_set << ": Compiled " << PrettyMethod(method_idx, dex_file); + VLOG(compiler) << cu.instruction_set << ": Compiled " << method_name; } else { - VLOG(compiler) << cu.instruction_set << ": Deferred " << PrettyMethod(method_idx, dex_file); + VLOG(compiler) << cu.instruction_set << ": Deferred " << method_name; } if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) { if (cu.arena.BytesAllocated() > (1 * 1024 *1024)) { MemStats mem_stats(cu.arena.GetMemStats()); - LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats); + LOG(INFO) << method_name << " " << Dumpable<MemStats>(mem_stats); } } if (cu.enable_debug & (1 << kDebugShowSummaryMemoryUsage)) { LOG(INFO) << "MEMINFO " << cu.arena.BytesAllocated() << " " << cu.mir_graph->GetNumBlocks() - << " " << PrettyMethod(method_idx, dex_file); + << " " << method_name; } cu.EndTiming(); diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index 590c7674f6..04d6898e36 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -316,9 +316,9 @@ void ArmMir2Lir::GenMoveException(RegLocation rl_dest) { int ex_offset = Thread::ExceptionOffset<4>().Int32Value(); RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); RegStorage reset_reg = AllocTempRef(); - LoadRefDisp(rs_rARM_SELF, ex_offset, rl_result.reg); + LoadRefDisp(rs_rARM_SELF, ex_offset, rl_result.reg, kNotVolatile); LoadConstant(reset_reg, 0); - StoreRefDisp(rs_rARM_SELF, ex_offset, reset_reg); + StoreRefDisp(rs_rARM_SELF, ex_offset, reset_reg, kNotVolatile); FreeTemp(reset_reg); StoreValue(rl_dest, rl_result); } diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 44998627ca..70dce7f11e 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -33,20 +33,16 @@ class ArmMir2Lir FINAL : public Mir2Lir { LIR* CheckSuspendUsingLoad() OVERRIDE; RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE; RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE; - LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) OVERRIDE; LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) OVERRIDE; + OpSize size, VolatileKind is_volatile) OVERRIDE; LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale, OpSize size) OVERRIDE; LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, RegStorage r_dest, OpSize size) OVERRIDE; LIR* LoadConstantNoClobber(RegStorage r_dest, int value); LIR* LoadConstantWide(RegStorage r_dest, int64_t value); - LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) OVERRIDE; LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) OVERRIDE; + OpSize size, VolatileKind is_volatile) OVERRIDE; LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale, OpSize size) OVERRIDE; LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 916c52838a..e34d944ab2 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -723,7 +723,7 @@ bool ArmMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { } else { DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0. - LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size); + LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile); StoreValue(rl_dest, rl_result); } return true; @@ -737,13 +737,13 @@ bool ArmMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { if (size == k64) { // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0. RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); - StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32); - StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32); + StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32, kNotVolatile); + StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32, kNotVolatile); } else { DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0. RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); - StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size); + StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile); } return true; } @@ -1230,7 +1230,7 @@ void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, } FreeTemp(reg_len); } - LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size); + LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, kNotVolatile); MarkPossibleNullPointerException(opt_flags); if (!constant_index) { FreeTemp(reg_ptr); @@ -1330,7 +1330,7 @@ void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, FreeTemp(reg_len); } - StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size); + StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile); MarkPossibleNullPointerException(opt_flags); } else { /* reg_ptr -> array data */ diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index b236f99311..bc8f95b1ca 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -961,31 +961,37 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag return load; } -LIR* ArmMir2Lir::LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) { - // Only 64-bit load needs special handling. - if (UNLIKELY(size == k64 || size == kDouble)) { - DCHECK(!r_dest.IsFloat()); // See RegClassForFieldLoadSave(). - // If the cpu supports LPAE, aligned LDRD is atomic - fall through to LoadBaseDisp(). - if (!cu_->compiler_driver->GetInstructionSetFeatures().HasLpae()) { - // Use LDREXD for the atomic load. (Expect displacement > 0, don't optimize for == 0.) - RegStorage r_ptr = AllocTemp(); - OpRegRegImm(kOpAdd, r_ptr, r_base, displacement); - LIR* lir = NewLIR3(kThumb2Ldrexd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg()); - FreeTemp(r_ptr); - return lir; - } - } - return LoadBaseDisp(r_base, displacement, r_dest, size); -} - LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) { + OpSize size, VolatileKind is_volatile) { // TODO: base this on target. if (size == kWord) { size = k32; } - return LoadBaseDispBody(r_base, displacement, r_dest, size); + LIR* load; + if (UNLIKELY(is_volatile == kVolatile && + (size == k64 || size == kDouble) && + !cu_->compiler_driver->GetInstructionSetFeatures().HasLpae())) { + // Only 64-bit load needs special handling. + // If the cpu supports LPAE, aligned LDRD is atomic - fall through to LoadBaseDisp(). + DCHECK(!r_dest.IsFloat()); // See RegClassForFieldLoadSave(). + // Use LDREXD for the atomic load. (Expect displacement > 0, don't optimize for == 0.) + RegStorage r_ptr = AllocTemp(); + OpRegRegImm(kOpAdd, r_ptr, r_base, displacement); + LIR* lir = NewLIR3(kThumb2Ldrexd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg()); + FreeTemp(r_ptr); + return lir; + } else { + load = LoadBaseDispBody(r_base, displacement, r_dest, size); + } + + if (UNLIKELY(is_volatile == kVolatile)) { + // Without context sensitive analysis, we must issue the most conservative barriers. + // In this case, either a load or store may follow so we issue both barriers. + GenMemBarrier(kLoadLoad); + GenMemBarrier(kLoadStore); + } + + return load; } @@ -1081,49 +1087,58 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora return store; } -LIR* ArmMir2Lir::StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) { - // Only 64-bit store needs special handling. - if (UNLIKELY(size == k64 || size == kDouble)) { - DCHECK(!r_src.IsFloat()); // See RegClassForFieldLoadSave(). +LIR* ArmMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, + OpSize size, VolatileKind is_volatile) { + if (UNLIKELY(is_volatile == kVolatile)) { + // There might have been a store before this volatile one so insert StoreStore barrier. + GenMemBarrier(kStoreStore); + } + + LIR* store; + if (UNLIKELY(is_volatile == kVolatile && + (size == k64 || size == kDouble) && + !cu_->compiler_driver->GetInstructionSetFeatures().HasLpae())) { + // Only 64-bit store needs special handling. // If the cpu supports LPAE, aligned STRD is atomic - fall through to StoreBaseDisp(). - if (!cu_->compiler_driver->GetInstructionSetFeatures().HasLpae()) { - // Use STREXD for the atomic store. (Expect displacement > 0, don't optimize for == 0.) - RegStorage r_ptr = AllocTemp(); + // Use STREXD for the atomic store. (Expect displacement > 0, don't optimize for == 0.) + DCHECK(!r_src.IsFloat()); // See RegClassForFieldLoadSave(). + RegStorage r_ptr = AllocTemp(); + OpRegRegImm(kOpAdd, r_ptr, r_base, displacement); + LIR* fail_target = NewLIR0(kPseudoTargetLabel); + // We have only 5 temporary registers available and if r_base, r_src and r_ptr already + // take 4, we can't directly allocate 2 more for LDREXD temps. In that case clobber r_ptr + // in LDREXD and recalculate it from r_base. + RegStorage r_temp = AllocTemp(); + RegStorage r_temp_high = AllocFreeTemp(); // We may not have another temp. + if (r_temp_high.Valid()) { + NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_temp_high.GetReg(), r_ptr.GetReg()); + FreeTemp(r_temp_high); + FreeTemp(r_temp); + } else { + // If we don't have another temp, clobber r_ptr in LDREXD and reload it. + NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_ptr.GetReg(), r_ptr.GetReg()); + FreeTemp(r_temp); // May need the temp for kOpAdd. OpRegRegImm(kOpAdd, r_ptr, r_base, displacement); - LIR* fail_target = NewLIR0(kPseudoTargetLabel); - // We have only 5 temporary registers available and if r_base, r_src and r_ptr already - // take 4, we can't directly allocate 2 more for LDREXD temps. In that case clobber r_ptr - // in LDREXD and recalculate it from r_base. - RegStorage r_temp = AllocTemp(); - RegStorage r_temp_high = AllocFreeTemp(); // We may not have another temp. - if (r_temp_high.Valid()) { - NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_temp_high.GetReg(), r_ptr.GetReg()); - FreeTemp(r_temp_high); - FreeTemp(r_temp); - } else { - // If we don't have another temp, clobber r_ptr in LDREXD and reload it. - NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_ptr.GetReg(), r_ptr.GetReg()); - FreeTemp(r_temp); // May need the temp for kOpAdd. - OpRegRegImm(kOpAdd, r_ptr, r_base, displacement); - } - LIR* lir = NewLIR4(kThumb2Strexd, r_temp.GetReg(), r_src.GetLowReg(), r_src.GetHighReg(), - r_ptr.GetReg()); - OpCmpImmBranch(kCondNe, r_temp, 0, fail_target); - FreeTemp(r_ptr); - return lir; } + store = NewLIR4(kThumb2Strexd, r_temp.GetReg(), r_src.GetLowReg(), r_src.GetHighReg(), + r_ptr.GetReg()); + OpCmpImmBranch(kCondNe, r_temp, 0, fail_target); + FreeTemp(r_ptr); + } else { + // TODO: base this on target. + if (size == kWord) { + size = k32; + } + + store = StoreBaseDispBody(r_base, displacement, r_src, size); } - return StoreBaseDisp(r_base, displacement, r_src, size); -} -LIR* ArmMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) { - // TODO: base this on target. - if (size == kWord) { - size = k32; + if (UNLIKELY(is_volatile == kVolatile)) { + // A load might follow the volatile store so insert a StoreLoad barrier. + GenMemBarrier(kStoreLoad); } - return StoreBaseDispBody(r_base, displacement, r_src, size); + + return store; } LIR* ArmMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) { diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index 3f32c5194e..1f1a252343 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -101,6 +101,7 @@ namespace art { // Temporary macros, used to mark code which wants to distinguish betweek zr/sp. #define A64_REG_IS_SP(reg_num) ((reg_num) == rwsp || (reg_num) == rsp) #define A64_REG_IS_ZR(reg_num) ((reg_num) == rwzr || (reg_num) == rxzr) +#define A64_REGSTORAGE_IS_SP_OR_ZR(rs) (((rs).GetRegNum() & 0x1f) == 0x1f) enum Arm64ResourceEncodingPos { kArm64GPReg0 = 0, diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index 2a8da24982..bee64f1d42 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -632,19 +632,19 @@ uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { if (static_cast<unsigned>(kind) < kFmtBitBlt) { bool is_zero = A64_REG_IS_ZR(operand); - if (kIsDebugBuild) { + if (kIsDebugBuild && (kFailOnSizeError || kReportSizeError)) { // Register usage checks: First establish register usage requirements based on the // format in `kind'. bool want_float = false; bool want_64_bit = false; - bool want_size_match = false; + bool want_var_size = true; bool want_zero = false; switch (kind) { case kFmtRegX: want_64_bit = true; // Intentional fall-through. case kFmtRegW: - want_size_match = true; + want_var_size = false; // Intentional fall-through. case kFmtRegR: want_zero = true; @@ -653,7 +653,7 @@ uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { want_64_bit = true; // Intentional fall-through. case kFmtRegWOrSp: - want_size_match = true; + want_var_size = false; break; case kFmtRegROrSp: break; @@ -661,7 +661,7 @@ uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { want_64_bit = true; // Intentional fall-through. case kFmtRegS: - want_size_match = true; + want_var_size = false; // Intentional fall-through. case kFmtRegF: want_float = true; @@ -672,21 +672,27 @@ uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { break; } + // want_var_size == true means kind == kFmtReg{R,F}. In these two cases, we want + // the register size to be coherent with the instruction width. + if (want_var_size) { + want_64_bit = opcode_is_wide; + } + // Now check that the requirements are satisfied. RegStorage reg(operand | RegStorage::kValid); const char *expected = nullptr; if (want_float) { if (!reg.IsFloat()) { expected = "float register"; - } else if (want_size_match && (reg.IsDouble() != want_64_bit)) { + } else if (reg.IsDouble() != want_64_bit) { expected = (want_64_bit) ? "double register" : "single register"; } } else { if (reg.IsFloat()) { expected = "core register"; - } else if (want_size_match && (reg.Is64Bit() != want_64_bit)) { + } else if (reg.Is64Bit() != want_64_bit) { expected = (want_64_bit) ? "x-register" : "w-register"; - } else if (reg.GetRegNum() == 31 && is_zero != want_zero) { + } else if (A64_REGSTORAGE_IS_SP_OR_ZR(reg) && is_zero != want_zero) { expected = (want_zero) ? "zero-register" : "sp-register"; } } @@ -698,8 +704,13 @@ uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { if (expected != nullptr) { LOG(WARNING) << "Method: " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " @ 0x" << std::hex << lir->dalvik_offset; - LOG(FATAL) << "Bad argument n. " << i << " of " << encoder->name - << ". Expected " << expected << ", got 0x" << std::hex << operand; + if (kFailOnSizeError) { + LOG(FATAL) << "Bad argument n. " << i << " of " << encoder->name + << ". Expected " << expected << ", got 0x" << std::hex << operand; + } else { + LOG(WARNING) << "Bad argument n. " << i << " of " << encoder->name + << ". Expected " << expected << ", got 0x" << std::hex << operand; + } } } diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 1df576b743..c3f4711546 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -267,7 +267,7 @@ void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { MarkPossibleNullPointerException(opt_flags); LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w1, rs_w2, NULL); GenMemBarrier(kStoreLoad); - Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_xzr); + Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_wzr); LIR* unlock_success_branch = OpUnconditionalBranch(NULL); LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); @@ -289,8 +289,8 @@ void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) { int ex_offset = Thread::ExceptionOffset<8>().Int32Value(); RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); - LoadRefDisp(rs_rA64_SELF, ex_offset, rl_result.reg); - StoreRefDisp(rs_rA64_SELF, ex_offset, rs_xzr); + LoadRefDisp(rs_rA64_SELF, ex_offset, rl_result.reg, kNotVolatile); + StoreRefDisp(rs_rA64_SELF, ex_offset, rs_xzr, kNotVolatile); StoreValue(rl_dest, rl_result); } diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index f1270eca01..68fa6f40cb 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -26,6 +26,11 @@ namespace art { class Arm64Mir2Lir : public Mir2Lir { protected: + // If we detect a size error, FATAL out. + static constexpr bool kFailOnSizeError = false && kIsDebugBuild; + // If we detect a size error, report to LOG. + static constexpr bool kReportSizeError = false && kIsDebugBuild; + // TODO: consolidate 64-bit target support. class InToRegStorageMapper { public: @@ -69,22 +74,25 @@ class Arm64Mir2Lir : public Mir2Lir { LIR* CheckSuspendUsingLoad() OVERRIDE; RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE; RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE; - LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) OVERRIDE; LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) OVERRIDE; + OpSize size, VolatileKind is_volatile) OVERRIDE; + LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest, + VolatileKind is_volatile) + OVERRIDE; LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale, OpSize size) OVERRIDE; + LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest) OVERRIDE; LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, RegStorage r_dest, OpSize size) OVERRIDE; LIR* LoadConstantNoClobber(RegStorage r_dest, int value); LIR* LoadConstantWide(RegStorage r_dest, int64_t value); - LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) OVERRIDE; LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) OVERRIDE; + OpSize size, VolatileKind is_volatile) OVERRIDE; + LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src, + VolatileKind is_volatile) OVERRIDE; LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale, OpSize size) OVERRIDE; + LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src) OVERRIDE; LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, RegStorage r_src, OpSize size) OVERRIDE; void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE; @@ -283,8 +291,15 @@ class Arm64Mir2Lir : public Mir2Lir { * @see As64BitReg */ RegStorage As32BitReg(RegStorage reg) { - DCHECK(reg.Is64Bit()); DCHECK(!reg.IsPair()); + if ((kFailOnSizeError || kReportSizeError) && !reg.Is64Bit()) { + if (kFailOnSizeError) { + LOG(FATAL) << "Expected 64b register"; + } else { + LOG(WARNING) << "Expected 64b register"; + return reg; + } + } RegStorage ret_val = RegStorage(RegStorage::k32BitSolo, reg.GetRawBits() & RegStorage::kRegTypeMask); DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k32SoloStorageMask) @@ -293,6 +308,18 @@ class Arm64Mir2Lir : public Mir2Lir { return ret_val; } + RegStorage Check32BitReg(RegStorage reg) { + if ((kFailOnSizeError || kReportSizeError) && !reg.Is32Bit()) { + if (kFailOnSizeError) { + LOG(FATAL) << "Checked for 32b register"; + } else { + LOG(WARNING) << "Checked for 32b register"; + return As32BitReg(reg); + } + } + return reg; + } + /** * @brief Given register wNN (sNN), returns register xNN (dNN). * @param reg #RegStorage containing a Solo32 input register (e.g. @c w1 or @c s2). @@ -300,8 +327,15 @@ class Arm64Mir2Lir : public Mir2Lir { * @see As32BitReg */ RegStorage As64BitReg(RegStorage reg) { - DCHECK(reg.Is32Bit()); DCHECK(!reg.IsPair()); + if ((kFailOnSizeError || kReportSizeError) && !reg.Is32Bit()) { + if (kFailOnSizeError) { + LOG(FATAL) << "Expected 32b register"; + } else { + LOG(WARNING) << "Expected 32b register"; + return reg; + } + } RegStorage ret_val = RegStorage(RegStorage::k64BitSolo, reg.GetRawBits() & RegStorage::kRegTypeMask); DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k64SoloStorageMask) @@ -310,6 +344,18 @@ class Arm64Mir2Lir : public Mir2Lir { return ret_val; } + RegStorage Check64BitReg(RegStorage reg) { + if ((kFailOnSizeError || kReportSizeError) && !reg.Is64Bit()) { + if (kFailOnSizeError) { + LOG(FATAL) << "Checked for 64b register"; + } else { + LOG(WARNING) << "Checked for 64b register"; + return As64BitReg(reg); + } + } + return reg; + } + LIR* LoadFPConstantValue(RegStorage r_dest, int32_t value); LIR* LoadFPConstantValueWide(RegStorage r_dest, int64_t value); void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 2ac4adbadc..1fdbe2dfba 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -410,7 +410,7 @@ bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); // kRefReg RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size); + LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile); if (size == k64) { StoreValueWide(rl_dest, rl_result); } else { @@ -433,7 +433,7 @@ bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); rl_value = LoadValue(rl_src_value, kCoreReg); } - StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size); + StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile); return true; } @@ -747,7 +747,11 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, } FreeTemp(reg_len); } - LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size); + if (rl_result.ref) { + LoadRefDisp(reg_ptr, data_offset, rl_result.reg, kNotVolatile); + } else { + LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, kNotVolatile); + } MarkPossibleNullPointerException(opt_flags); if (!constant_index) { FreeTemp(reg_ptr); @@ -768,7 +772,11 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, GenArrayBoundsCheck(rl_index.reg, reg_len); FreeTemp(reg_len); } - LoadBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale, size); + if (rl_result.ref) { + LoadRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg); + } else { + LoadBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale, size); + } MarkPossibleNullPointerException(opt_flags); FreeTemp(reg_ptr); StoreValue(rl_dest, rl_result); @@ -847,8 +855,11 @@ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, } FreeTemp(reg_len); } - - StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size); + if (rl_src.ref) { + StoreRefDisp(reg_ptr, data_offset, rl_src.reg, kNotVolatile); + } else { + StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile); + } MarkPossibleNullPointerException(opt_flags); } else { /* reg_ptr -> array data */ @@ -858,7 +869,11 @@ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, GenArrayBoundsCheck(rl_index.reg, reg_len); FreeTemp(reg_len); } - StoreBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale, size); + if (rl_src.ref) { + StoreRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg); + } else { + StoreBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale, size); + } MarkPossibleNullPointerException(opt_flags); } if (allocated_reg_ptr_temp) { diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 06e1cda305..dfaa4837d2 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -789,7 +789,7 @@ RegStorage Arm64Mir2Lir::LoadHelper(ThreadOffset<4> offset) { RegStorage Arm64Mir2Lir::LoadHelper(ThreadOffset<8> offset) { // TODO(Arm64): use LoadWordDisp instead. // e.g. LoadWordDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR); - LoadBaseDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR, k64); + LoadBaseDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR, k64, kNotVolatile); return rs_rA64_LR; } @@ -949,7 +949,7 @@ void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { StoreValue(rl_method, rl_src); // If Method* has been promoted, explicitly flush if (rl_method.location == kLocPhysReg) { - StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0)); + StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0), kNotVolatile); } if (cu_->num_ins == 0) { @@ -971,7 +971,7 @@ void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) { OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg); } else { - StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size); + StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size, kNotVolatile); if (reg.Is64Bit()) { if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) { LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots"; @@ -1057,14 +1057,14 @@ int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, loc = UpdateLocWide(loc); if (loc.location == kLocPhysReg) { ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64); + StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile); } next_arg += 2; } else { loc = UpdateLoc(loc); if (loc.location == kLocPhysReg) { ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32); + StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile); } next_arg++; } @@ -1122,18 +1122,27 @@ int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (rl_arg.wide) { if (rl_arg.location == kLocPhysReg) { - StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64); + StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile); } else { LoadValueDirectWideFixed(rl_arg, regWide); - StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64); + StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64, kNotVolatile); } i++; } else { if (rl_arg.location == kLocPhysReg) { - StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32); + if (rl_arg.ref) { + StoreRefDisp(TargetReg(kSp), out_offset, rl_arg.reg, kNotVolatile); + } else { + StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile); + } } else { - LoadValueDirectFixed(rl_arg, regSingle); - StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32); + if (rl_arg.ref) { + LoadValueDirectFixed(rl_arg, regSingle); + StoreRefDisp(TargetReg(kSp), out_offset, regSingle, kNotVolatile); + } else { + LoadValueDirectFixed(rl_arg, As32BitReg(regSingle)); + StoreBaseDisp(TargetReg(kSp), out_offset, As32BitReg(regSingle), k32, kNotVolatile); + } } } } diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index 672aa8884f..12c2f415d6 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -893,9 +893,7 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto ArmOpcode opcode = kA64Brk1d; DCHECK(r_base.Is64Bit()); // TODO: need a cleaner handling of index registers here and throughout. - if (r_index.Is32Bit()) { - r_index = As64BitReg(r_index); - } + r_index = Check32BitReg(r_index); if (r_dest.IsFloat()) { if (r_dest.IsDouble()) { @@ -918,12 +916,14 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto case kDouble: case kWord: case k64: + r_dest = Check64BitReg(r_dest); opcode = WIDE(kA64Ldr4rXxG); expected_scale = 3; break; case kSingle: case k32: case kReference: + r_dest = Check32BitReg(r_dest); opcode = kA64Ldr4rXxG; expected_scale = 2; break; @@ -959,6 +959,10 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto return load; } +LIR* Arm64Mir2Lir::LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest) { + return LoadBaseIndexed(r_base, r_index, As32BitReg(r_dest), 2, kReference); +} + LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale, OpSize size) { LIR* store; @@ -966,9 +970,7 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt ArmOpcode opcode = kA64Brk1d; DCHECK(r_base.Is64Bit()); // TODO: need a cleaner handling of index registers here and throughout. - if (r_index.Is32Bit()) { - r_index = As64BitReg(r_index); - } + r_index = Check32BitReg(r_index); if (r_src.IsFloat()) { if (r_src.IsDouble()) { @@ -991,12 +993,14 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt case kDouble: // Intentional fall-trough. case kWord: // Intentional fall-trough. case k64: + r_src = Check64BitReg(r_src); opcode = WIDE(kA64Str4rXxG); expected_scale = 3; break; case kSingle: // Intentional fall-trough. case k32: // Intentional fall-trough. case kReference: + r_src = Check32BitReg(r_src); opcode = kA64Str4rXxG; expected_scale = 2; break; @@ -1026,6 +1030,10 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt return store; } +LIR* Arm64Mir2Lir::StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src) { + return StoreBaseIndexed(r_base, r_index, As32BitReg(r_src), 2, kReference); +} + /* * Load value from base + displacement. Optionally perform null check * on base (which must have an associated s_reg and MIR). If not @@ -1042,6 +1050,7 @@ LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStor case kDouble: // Intentional fall-through. case kWord: // Intentional fall-through. case k64: + r_dest = Check64BitReg(r_dest); scale = 3; if (r_dest.IsFloat()) { DCHECK(r_dest.IsDouble()); @@ -1055,6 +1064,7 @@ LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStor case kSingle: // Intentional fall-through. case k32: // Intentional fall-trough. case kReference: + r_dest = Check32BitReg(r_dest); scale = 2; if (r_dest.IsFloat()) { DCHECK(r_dest.IsSingle()); @@ -1106,18 +1116,27 @@ LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStor return load; } -LIR* Arm64Mir2Lir::LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) { +LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, + OpSize size, VolatileKind is_volatile) { // LoadBaseDisp() will emit correct insn for atomic load on arm64 // assuming r_dest is correctly prepared using RegClassForFieldLoadStore(). - return LoadBaseDisp(r_base, displacement, r_dest, size); -} -LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) { - return LoadBaseDispBody(r_base, displacement, r_dest, size); + LIR* load = LoadBaseDispBody(r_base, displacement, r_dest, size); + + if (UNLIKELY(is_volatile == kVolatile)) { + // Without context sensitive analysis, we must issue the most conservative barriers. + // In this case, either a load or store may follow so we issue both barriers. + GenMemBarrier(kLoadLoad); + GenMemBarrier(kLoadStore); + } + + return load; } +LIR* Arm64Mir2Lir::LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest, + VolatileKind is_volatile) { + return LoadBaseDisp(r_base, displacement, As32BitReg(r_dest), kReference, is_volatile); +} LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size) { @@ -1130,6 +1149,7 @@ LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegSto case kDouble: // Intentional fall-through. case kWord: // Intentional fall-through. case k64: + r_src = Check64BitReg(r_src); scale = 3; if (r_src.IsFloat()) { DCHECK(r_src.IsDouble()); @@ -1143,6 +1163,7 @@ LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegSto case kSingle: // Intentional fall-through. case k32: // Intentional fall-trough. case kReference: + r_src = Check32BitReg(r_src); scale = 2; if (r_src.IsFloat()) { DCHECK(r_src.IsSingle()); @@ -1188,16 +1209,29 @@ LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegSto return store; } -LIR* Arm64Mir2Lir::StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) { +LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, + OpSize size, VolatileKind is_volatile) { + if (UNLIKELY(is_volatile == kVolatile)) { + // There might have been a store before this volatile one so insert StoreStore barrier. + GenMemBarrier(kStoreStore); + } + // StoreBaseDisp() will emit correct insn for atomic store on arm64 // assuming r_dest is correctly prepared using RegClassForFieldLoadStore(). - return StoreBaseDisp(r_base, displacement, r_src, size); + + LIR* store = StoreBaseDispBody(r_base, displacement, r_src, size); + + if (UNLIKELY(is_volatile == kVolatile)) { + // A load might follow the volatile store so insert a StoreLoad barrier. + GenMemBarrier(kStoreLoad); + } + + return store; } -LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) { - return StoreBaseDispBody(r_base, displacement, r_src, size); +LIR* Arm64Mir2Lir::StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src, + VolatileKind is_volatile) { + return StoreBaseDisp(r_base, displacement, As32BitReg(r_src), kReference, is_volatile); } LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) { diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index ec0fb43571..f31b670164 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -79,6 +79,20 @@ void Mir2Lir::MarkSafepointPC(LIR* inst) { DCHECK(safepoint_pc->u.m.def_mask->Equals(kEncodeAll)); } +void Mir2Lir::MarkSafepointPCAfter(LIR* after) { + DCHECK(!after->flags.use_def_invalid); + after->u.m.def_mask = &kEncodeAll; + // As NewLIR0 uses Append, we need to create the LIR by hand. + LIR* safepoint_pc = RawLIR(current_dalvik_offset_, kPseudoSafepointPC); + if (after->next == nullptr) { + DCHECK_EQ(after, last_lir_insn_); + AppendLIR(safepoint_pc); + } else { + InsertLIRAfter(after, safepoint_pc); + } + DCHECK(safepoint_pc->u.m.def_mask->Equals(kEncodeAll)); +} + /* Remove a LIR from the list. */ void Mir2Lir::UnlinkLIR(LIR* lir) { if (UNLIKELY(lir == first_lir_insn_)) { @@ -1112,7 +1126,7 @@ void Mir2Lir::InsertLIRBefore(LIR* current_lir, LIR* new_lir) { /* * Insert an LIR instruction after the current instruction, which cannot be the - * first instruction. + * last instruction. * * current_lir -> new_lir -> old_next */ diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index e36b592c74..b00cbeb61d 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -196,6 +196,15 @@ void Mir2Lir::MarkPossibleNullPointerException(int opt_flags) { } } +void Mir2Lir::MarkPossibleNullPointerExceptionAfter(int opt_flags, LIR* after) { + if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) { + if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) { + return; + } + MarkSafepointPCAfter(after); + } +} + void Mir2Lir::MarkPossibleStackOverflowException() { if (!cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) { MarkSafepointPC(last_lir_insn_); @@ -506,7 +515,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) { for (int i = 0; i < elems; i++) { RegLocation rl_arg = LoadValue(info->args[i], kCoreReg); Store32Disp(TargetReg(kRet0), - mirror::Array::DataOffset(component_size).Int32Value() + i * 4, rl_arg.reg); + mirror::Array::DataOffset(component_size).Int32Value() + i * 4, rl_arg.reg); // If the LoadValue caused a temp to be allocated, free it if (IsTemp(rl_arg.reg)) { FreeTemp(rl_arg.reg); @@ -575,7 +584,8 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double, // Fast path, static storage base is this method's class RegLocation rl_method = LoadCurrMethod(); r_base = AllocTempRef(); - LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base); + LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base, + kNotVolatile); if (IsTemp(rl_method.reg)) { FreeTemp(rl_method.reg); } @@ -592,9 +602,10 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double, LoadCurrMethodDirect(r_method); r_base = TargetReg(kArg0); LockTemp(r_base); - LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base); + LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base, + kNotVolatile); int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value(); - LoadRefDisp(r_base, offset_of_field, r_base); + LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile); // r_base now points at static storage (Class*) or NULL if the type is not yet resolved. if (!field_info.IsInitialized() && (mir->optimization_flags & MIR_IGNORE_CLINIT_CHECK) == 0) { @@ -626,14 +637,12 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double, } else { rl_src = LoadValue(rl_src, reg_class); } - if (field_info.IsVolatile()) { - // There might have been a store before this volatile one so insert StoreStore barrier. - GenMemBarrier(kStoreStore); - StoreBaseDispVolatile(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg, store_size); - // A load might follow the volatile store so insert a StoreLoad barrier. - GenMemBarrier(kStoreLoad); + if (is_object) { + StoreRefDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg, + field_info.IsVolatile() ? kVolatile : kNotVolatile); } else { - StoreBaseDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg, store_size); + StoreBaseDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg, store_size, + field_info.IsVolatile() ? kVolatile : kNotVolatile); } if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) { MarkGCCard(rl_src.reg, r_base); @@ -672,7 +681,8 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, // Fast path, static storage base is this method's class RegLocation rl_method = LoadCurrMethod(); r_base = AllocTempRef(); - LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base); + LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base, + kNotVolatile); } else { // Medium path, static storage base in a different class which requires checks that the other // class is initialized @@ -685,9 +695,10 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, LoadCurrMethodDirect(r_method); r_base = TargetReg(kArg0); LockTemp(r_base); - LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base); + LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base, + kNotVolatile); int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value(); - LoadRefDisp(r_base, offset_of_field, r_base); + LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile); // r_base now points at static storage (Class*) or NULL if the type is not yet resolved. if (!field_info.IsInitialized() && (mir->optimization_flags & MIR_IGNORE_CLINIT_CHECK) == 0) { @@ -717,14 +728,12 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, RegLocation rl_result = EvalLoc(rl_dest, reg_class, true); int field_offset = field_info.FieldOffset().Int32Value(); - if (field_info.IsVolatile()) { - LoadBaseDispVolatile(r_base, field_offset, rl_result.reg, load_size); - // Without context sensitive analysis, we must issue the most conservative barriers. - // In this case, either a load or store may follow so we issue both barriers. - GenMemBarrier(kLoadLoad); - GenMemBarrier(kLoadStore); + if (is_object) { + LoadRefDisp(r_base, field_offset, rl_result.reg, field_info.IsVolatile() ? kVolatile : + kNotVolatile); } else { - LoadBaseDisp(r_base, field_offset, rl_result.reg, load_size); + LoadBaseDisp(r_base, field_offset, rl_result.reg, load_size, field_info.IsVolatile() ? + kVolatile : kNotVolatile); } FreeTemp(r_base); @@ -785,17 +794,15 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size, GenNullCheck(rl_obj.reg, opt_flags); RegLocation rl_result = EvalLoc(rl_dest, reg_class, true); int field_offset = field_info.FieldOffset().Int32Value(); - if (field_info.IsVolatile()) { - LoadBaseDispVolatile(rl_obj.reg, field_offset, rl_result.reg, load_size); - MarkPossibleNullPointerException(opt_flags); - // Without context sensitive analysis, we must issue the most conservative barriers. - // In this case, either a load or store may follow so we issue both barriers. - GenMemBarrier(kLoadLoad); - GenMemBarrier(kLoadStore); + LIR* load_lir; + if (is_object) { + load_lir = LoadRefDisp(rl_obj.reg, field_offset, rl_result.reg, field_info.IsVolatile() ? + kVolatile : kNotVolatile); } else { - LoadBaseDisp(rl_obj.reg, field_offset, rl_result.reg, load_size); - MarkPossibleNullPointerException(opt_flags); + load_lir = LoadBaseDisp(rl_obj.reg, field_offset, rl_result.reg, load_size, + field_info.IsVolatile() ? kVolatile : kNotVolatile); } + MarkPossibleNullPointerExceptionAfter(opt_flags, load_lir); if (is_long_or_double) { StoreValueWide(rl_dest, rl_result); } else { @@ -847,17 +854,15 @@ void Mir2Lir::GenIPut(MIR* mir, int opt_flags, OpSize size, } GenNullCheck(rl_obj.reg, opt_flags); int field_offset = field_info.FieldOffset().Int32Value(); - if (field_info.IsVolatile()) { - // There might have been a store before this volatile one so insert StoreStore barrier. - GenMemBarrier(kStoreStore); - StoreBaseDispVolatile(rl_obj.reg, field_offset, rl_src.reg, store_size); - MarkPossibleNullPointerException(opt_flags); - // A load might follow the volatile store so insert a StoreLoad barrier. - GenMemBarrier(kStoreLoad); + LIR* store; + if (is_object) { + store = StoreRefDisp(rl_obj.reg, field_offset, rl_src.reg, field_info.IsVolatile() ? + kVolatile : kNotVolatile); } else { - StoreBaseDisp(rl_obj.reg, field_offset, rl_src.reg, store_size); - MarkPossibleNullPointerException(opt_flags); + store = StoreBaseDisp(rl_obj.reg, field_offset, rl_src.reg, store_size, + field_info.IsVolatile() ? kVolatile : kNotVolatile); } + MarkPossibleNullPointerExceptionAfter(opt_flags, store); if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) { MarkGCCard(rl_src.reg, rl_obj.reg); } @@ -916,9 +921,9 @@ void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) { // We're don't need access checks, load type from dex cache int32_t dex_cache_offset = mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(); - LoadRefDisp(rl_method.reg, dex_cache_offset, res_reg); + LoadRefDisp(rl_method.reg, dex_cache_offset, res_reg, kNotVolatile); int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); - LoadRefDisp(res_reg, offset_of_type, rl_result.reg); + LoadRefDisp(res_reg, offset_of_type, rl_result.reg, kNotVolatile); if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx) || SLOW_TYPE_PATH) { // Slow path, at runtime test if type is null and if so initialize @@ -989,10 +994,10 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) { LoadCurrMethodDirect(r_method); } LoadRefDisp(r_method, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), - TargetReg(kArg0)); + TargetReg(kArg0), kNotVolatile); // Might call out to helper, which will return resolved string in kRet0 - LoadRefDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0)); + LoadRefDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0), kNotVolatile); LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0), 0, NULL); LIR* cont = NewLIR0(kPseudoTargetLabel); @@ -1031,8 +1036,9 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) { RegLocation rl_method = LoadCurrMethod(); RegStorage res_reg = AllocTempRef(); RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); - LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), res_reg); - LoadRefDisp(res_reg, offset_of_string, rl_result.reg); + LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), res_reg, + kNotVolatile); + LoadRefDisp(res_reg, offset_of_string, rl_result.reg, kNotVolatile); StoreValue(rl_dest, rl_result); } } @@ -1133,14 +1139,17 @@ void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, Re LoadCurrMethodDirect(check_class); if (use_declaring_class) { - LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), check_class); - LoadRefDisp(object.reg, mirror::Object::ClassOffset().Int32Value(), object_class); + LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), check_class, + kNotVolatile); + LoadRefDisp(object.reg, mirror::Object::ClassOffset().Int32Value(), object_class, + kNotVolatile); } else { LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), - check_class); - LoadRefDisp(object.reg, mirror::Object::ClassOffset().Int32Value(), object_class); + check_class, kNotVolatile); + LoadRefDisp(object.reg, mirror::Object::ClassOffset().Int32Value(), object_class, + kNotVolatile); int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); - LoadRefDisp(check_class, offset_of_type, check_class); + LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile); } LIR* ne_branchover = NULL; @@ -1196,14 +1205,14 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know } else if (use_declaring_class) { LoadValueDirectFixed(rl_src, TargetReg(kArg0)); // kArg0 <= ref LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(), - class_reg); + class_reg, kNotVolatile); } else { // Load dex cache entry into class_reg (kArg2) LoadValueDirectFixed(rl_src, TargetReg(kArg0)); // kArg0 <= ref LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), - class_reg); + class_reg, kNotVolatile); int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); - LoadRefDisp(class_reg, offset_of_type, class_reg); + LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile); if (!can_assume_type_is_in_dex_cache) { // Need to test presence of type in dex cache at runtime LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL); @@ -1231,7 +1240,8 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know /* load object->klass_ */ DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0); - LoadRefDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1)); + LoadRefDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1), + kNotVolatile); /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class */ LIR* branchover = NULL; if (type_known_final) { @@ -1344,13 +1354,13 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_ OpRegCopy(class_reg, TargetReg(kRet0)); // Align usage with fast path } else if (use_declaring_class) { LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(), - class_reg); + class_reg, kNotVolatile); } else { // Load dex cache entry into class_reg (kArg2) LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), - class_reg); + class_reg, kNotVolatile); int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); - LoadRefDisp(class_reg, offset_of_type, class_reg); + LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile); if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx)) { // Need to test presence of type in dex cache at runtime LIR* hop_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL); @@ -1405,7 +1415,7 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_ if (load_) { m2l_->LoadRefDisp(m2l_->TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), - m2l_->TargetReg(kArg1)); + m2l_->TargetReg(kArg1), kNotVolatile); } if (m2l_->cu_->target64) { m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pCheckCast), m2l_->TargetReg(kArg2), @@ -1436,7 +1446,8 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_ LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL); /* load object->klass_ */ DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0); - LoadRefDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1)); + LoadRefDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1), + kNotVolatile); LIR* branch2 = OpCmpBranch(kCondNe, TargetReg(kArg1), class_reg, NULL); LIR* cont = NewLIR0(kPseudoTargetLabel); diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 638c590f2e..008ebfbd71 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -501,7 +501,7 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { StoreValue(rl_method, rl_src); // If Method* has been promoted, explicitly flush if (rl_method.location == kLocPhysReg) { - StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0)); + StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0), kNotVolatile); } if (cu_->num_ins == 0) { @@ -616,7 +616,8 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info, case 1: // Get method->dex_cache_resolved_methods_ cg->LoadRefDisp(cg->TargetReg(kArg0), mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), - cg->TargetReg(kArg0)); + cg->TargetReg(kArg0), + kNotVolatile); // Set up direct code if known. if (direct_code != 0) { if (direct_code != static_cast<uintptr_t>(-1)) { @@ -631,7 +632,8 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info, CHECK_EQ(cu->dex_file, target_method.dex_file); cg->LoadRefDisp(cg->TargetReg(kArg0), ObjArray::OffsetOfElement(target_method.dex_method_index).Int32Value(), - cg->TargetReg(kArg0)); + cg->TargetReg(kArg0), + kNotVolatile); break; case 3: // Grab the code from the method* if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) { @@ -676,17 +678,20 @@ static int NextVCallInsn(CompilationUnit* cu, CallInfo* info, cg->GenNullCheck(cg->TargetReg(kArg1), info->opt_flags); // get this->klass_ [use kArg1, set kInvokeTgt] cg->LoadRefDisp(cg->TargetReg(kArg1), mirror::Object::ClassOffset().Int32Value(), - cg->TargetReg(kInvokeTgt)); + cg->TargetReg(kInvokeTgt), + kNotVolatile); cg->MarkPossibleNullPointerException(info->opt_flags); break; case 2: // Get this->klass_->vtable [usr kInvokeTgt, set kInvokeTgt] cg->LoadRefDisp(cg->TargetReg(kInvokeTgt), mirror::Class::VTableOffset().Int32Value(), - cg->TargetReg(kInvokeTgt)); + cg->TargetReg(kInvokeTgt), + kNotVolatile); break; case 3: // Get target method [use kInvokeTgt, set kArg0] cg->LoadRefDisp(cg->TargetReg(kInvokeTgt), ObjArray::OffsetOfElement(method_idx).Int32Value(), - cg->TargetReg(kArg0)); + cg->TargetReg(kArg0), + kNotVolatile); break; case 4: // Get the compiled code address [uses kArg0, sets kInvokeTgt] if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) { @@ -731,19 +736,22 @@ static int NextInterfaceCallInsn(CompilationUnit* cu, CallInfo* info, int state, cg->GenNullCheck(cg->TargetReg(kArg1), info->opt_flags); // Get this->klass_ [use kArg1, set kInvokeTgt] cg->LoadRefDisp(cg->TargetReg(kArg1), mirror::Object::ClassOffset().Int32Value(), - cg->TargetReg(kInvokeTgt)); + cg->TargetReg(kInvokeTgt), + kNotVolatile); cg->MarkPossibleNullPointerException(info->opt_flags); break; case 3: // Get this->klass_->imtable [use kInvokeTgt, set kInvokeTgt] // NOTE: native pointer. cg->LoadRefDisp(cg->TargetReg(kInvokeTgt), mirror::Class::ImTableOffset().Int32Value(), - cg->TargetReg(kInvokeTgt)); + cg->TargetReg(kInvokeTgt), + kNotVolatile); break; case 4: // Get target method [use kInvokeTgt, set kArg0] // NOTE: native pointer. cg->LoadRefDisp(cg->TargetReg(kInvokeTgt), ObjArray::OffsetOfElement(method_idx % ClassLinker::kImtSize).Int32Value(), - cg->TargetReg(kArg0)); + cg->TargetReg(kArg0), + kNotVolatile); break; case 5: // Get the compiled code address [use kArg0, set kInvokeTgt] if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) { @@ -967,7 +975,7 @@ int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, { ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (rl_arg.wide) { - StoreBaseDisp(TargetReg(kSp), outs_offset, arg_reg, k64); + StoreBaseDisp(TargetReg(kSp), outs_offset, arg_reg, k64, kNotVolatile); next_use += 2; } else { Store32Disp(TargetReg(kSp), outs_offset, arg_reg); @@ -1037,7 +1045,7 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, loc = UpdateLocWide(loc); if ((next_arg >= 2) && (loc.location == kLocPhysReg)) { ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64); + StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile); } next_arg += 2; } else { @@ -1307,7 +1315,7 @@ bool Mir2Lir::GenInlinedCharAt(CallInfo* info) { reg_off = AllocTemp(); reg_ptr = AllocTempRef(); Load32Disp(rl_obj.reg, offset_offset, reg_off); - LoadRefDisp(rl_obj.reg, value_offset, reg_ptr); + LoadRefDisp(rl_obj.reg, value_offset, reg_ptr, kNotVolatile); } if (rl_idx.is_const) { OpRegImm(kOpAdd, reg_off, mir_graph_->ConstantValue(rl_idx.orig_sreg)); @@ -1672,7 +1680,7 @@ bool Mir2Lir::GenInlinedUnsafeGet(CallInfo* info, } else { RegStorage rl_temp_offset = AllocTemp(); OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg); - LoadBaseDisp(rl_temp_offset, 0, rl_result.reg, k64); + LoadBaseDisp(rl_temp_offset, 0, rl_result.reg, k64, kNotVolatile); FreeTemp(rl_temp_offset); } } else { @@ -1719,7 +1727,7 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long, } else { RegStorage rl_temp_offset = AllocTemp(); OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg); - StoreBaseDisp(rl_temp_offset, 0, rl_value.reg, k64); + StoreBaseDisp(rl_temp_offset, 0, rl_value.reg, k64, kNotVolatile); FreeTemp(rl_temp_offset); } } else { diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc index 6469d9c4f1..bfb77fc222 100644 --- a/compiler/dex/quick/gen_loadstore.cc +++ b/compiler/dex/quick/gen_loadstore.cc @@ -66,7 +66,7 @@ void Mir2Lir::Workaround7250540(RegLocation rl_dest, RegStorage zero_reg) { } else { // Lives in the frame, need to store. ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), temp_reg, k32); + StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), temp_reg, k32, kNotVolatile); } if (!zero_reg.Valid()) { FreeTemp(temp_reg); @@ -93,7 +93,7 @@ void Mir2Lir::LoadValueDirect(RegLocation rl_src, RegStorage r_dest) { (rl_src.location == kLocCompilerTemp)); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (rl_src.ref) { - LoadRefDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest); + LoadRefDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, kNotVolatile); } else { Load32Disp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest); } @@ -126,7 +126,7 @@ void Mir2Lir::LoadValueDirectWide(RegLocation rl_src, RegStorage r_dest) { DCHECK((rl_src.location == kLocDalvikFrame) || (rl_src.location == kLocCompilerTemp)); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - LoadBaseDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, k64); + LoadBaseDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, k64, kNotVolatile); } } @@ -214,7 +214,11 @@ void Mir2Lir::StoreValue(RegLocation rl_dest, RegLocation rl_src) { if (IsDirty(rl_dest.reg) && LiveOut(rl_dest.s_reg_low)) { def_start = last_lir_insn_; ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg); + if (rl_dest.ref) { + StoreRefDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, kNotVolatile); + } else { + Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg); + } MarkClean(rl_dest); def_end = last_lir_insn_; if (!rl_dest.ref) { @@ -301,7 +305,7 @@ void Mir2Lir::StoreValueWide(RegLocation rl_dest, RegLocation rl_src) { DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1), mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low))); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64); + StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64, kNotVolatile); MarkClean(rl_dest); def_end = last_lir_insn_; MarkDefWide(rl_dest, def_start, def_end); @@ -365,7 +369,7 @@ void Mir2Lir::StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src) { DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1), mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low))); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64); + StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64, kNotVolatile); MarkClean(rl_dest); LIR *def_end = last_lir_insn_; MarkDefWide(rl_dest, def_start, def_end); diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc index e53105fc84..26ea6a8ec7 100644 --- a/compiler/dex/quick/mips/call_mips.cc +++ b/compiler/dex/quick/mips/call_mips.cc @@ -264,9 +264,9 @@ void MipsMir2Lir::GenMoveException(RegLocation rl_dest) { int ex_offset = Thread::ExceptionOffset<4>().Int32Value(); RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); RegStorage reset_reg = AllocTempRef(); - LoadRefDisp(rs_rMIPS_SELF, ex_offset, rl_result.reg); + LoadRefDisp(rs_rMIPS_SELF, ex_offset, rl_result.reg, kNotVolatile); LoadConstant(reset_reg, 0); - StoreRefDisp(rs_rMIPS_SELF, ex_offset, reset_reg); + StoreRefDisp(rs_rMIPS_SELF, ex_offset, reset_reg, kNotVolatile); FreeTemp(reset_reg); StoreValue(rl_dest, rl_result); } diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h index 571adaccc1..c0ad9161f6 100644 --- a/compiler/dex/quick/mips/codegen_mips.h +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -33,20 +33,16 @@ class MipsMir2Lir FINAL : public Mir2Lir { LIR* CheckSuspendUsingLoad() OVERRIDE; RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE; RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE; - LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) OVERRIDE; LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) OVERRIDE; + OpSize size, VolatileKind is_volatile) OVERRIDE; LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale, OpSize size) OVERRIDE; LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, RegStorage r_dest, OpSize size) OVERRIDE; LIR* LoadConstantNoClobber(RegStorage r_dest, int value); LIR* LoadConstantWide(RegStorage r_dest, int64_t value); - LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) OVERRIDE; LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) OVERRIDE; + OpSize size, VolatileKind is_volatile) OVERRIDE; LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale, OpSize size) OVERRIDE; LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc index beaf6bb8ea..903a7709ca 100644 --- a/compiler/dex/quick/mips/int_mips.cc +++ b/compiler/dex/quick/mips/int_mips.cc @@ -294,7 +294,7 @@ bool MipsMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); DCHECK(size == kSignedByte); - LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size); + LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile); StoreValue(rl_dest, rl_result); return true; } @@ -310,7 +310,7 @@ bool MipsMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); DCHECK(size == kSignedByte); RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); - StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size); + StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile); return true; } @@ -524,7 +524,7 @@ void MipsMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, GenArrayBoundsCheck(rl_index.reg, reg_len); FreeTemp(reg_len); } - LoadBaseDisp(reg_ptr, 0, rl_result.reg, size); + LoadBaseDisp(reg_ptr, 0, rl_result.reg, size, kNotVolatile); FreeTemp(reg_ptr); StoreValueWide(rl_dest, rl_result); @@ -602,7 +602,7 @@ void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, FreeTemp(reg_len); } - StoreBaseDisp(reg_ptr, 0, rl_src.reg, size); + StoreBaseDisp(reg_ptr, 0, rl_src.reg, size, kNotVolatile); } else { rl_src = LoadValue(rl_src, reg_class); if (needs_range_check) { diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc index 01b25f9291..b49f43617f 100644 --- a/compiler/dex/quick/mips/utility_mips.cc +++ b/compiler/dex/quick/mips/utility_mips.cc @@ -546,23 +546,31 @@ LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStora return load; } -LIR* MipsMir2Lir::LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) { - DCHECK(size != k64 && size != kDouble); - return LoadBaseDisp(r_base, displacement, r_dest, size); -} - LIR* MipsMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) { + OpSize size, VolatileKind is_volatile) { + if (is_volatile == kVolatile) { + DCHECK(size != k64 && size != kDouble); + } + // TODO: base this on target. if (size == kWord) { size = k32; } + LIR* load; if (size == k64 || size == kDouble) { - return LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), r_dest.GetHigh(), size); + load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), r_dest.GetHigh(), size); } else { - return LoadBaseDispBody(r_base, displacement, r_dest, RegStorage::InvalidReg(), size); + load = LoadBaseDispBody(r_base, displacement, r_dest, RegStorage::InvalidReg(), size); } + + if (UNLIKELY(is_volatile == kVolatile)) { + // Without context sensitive analysis, we must issue the most conservative barriers. + // In this case, either a load or store may follow so we issue both barriers. + GenMemBarrier(kLoadLoad); + GenMemBarrier(kLoadStore); + } + + return load; } // FIXME: don't split r_dest into 2 containers. @@ -648,23 +656,31 @@ LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, return res; } -LIR* MipsMir2Lir::StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) { - DCHECK(size != k64 && size != kDouble); - return StoreBaseDisp(r_base, displacement, r_src, size); -} - LIR* MipsMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) { + OpSize size, VolatileKind is_volatile) { + if (is_volatile == kVolatile) { + DCHECK(size != k64 && size != kDouble); + // There might have been a store before this volatile one so insert StoreStore barrier. + GenMemBarrier(kStoreStore); + } + // TODO: base this on target. if (size == kWord) { size = k32; } + LIR* store; if (size == k64 || size == kDouble) { - return StoreBaseDispBody(r_base, displacement, r_src.GetLow(), r_src.GetHigh(), size); + store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), r_src.GetHigh(), size); } else { - return StoreBaseDispBody(r_base, displacement, r_src, RegStorage::InvalidReg(), size); + store = StoreBaseDispBody(r_base, displacement, r_src, RegStorage::InvalidReg(), size); } + + if (UNLIKELY(is_volatile == kVolatile)) { + // A load might follow the volatile store so insert a StoreLoad barrier. + GenMemBarrier(kStoreLoad); + } + + return store; } LIR* MipsMir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) { diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 1fc416301c..5d68187d8b 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -92,7 +92,7 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) if (!reg_arg.Valid()) { RegStorage new_reg = wide ? AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class); - LoadBaseDisp(TargetReg(kSp), offset, new_reg, wide ? k64 : k32); + LoadBaseDisp(TargetReg(kSp), offset, new_reg, wide ? k64 : k32, kNotVolatile); return new_reg; } else { // Check if we need to copy the arg to a different reg_class. @@ -120,7 +120,7 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) // If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg. if (!reg_arg_low.Valid()) { RegStorage new_regs = AllocTypedTempWide(false, reg_class); - LoadBaseDisp(TargetReg(kSp), offset, new_regs, k64); + LoadBaseDisp(TargetReg(kSp), offset, new_regs, k64, kNotVolatile); return new_regs; // The reg_class is OK, we can return. } else { // Assume that no ABI allows splitting a wide fp reg between a narrow fp reg and memory, @@ -193,7 +193,7 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { if (reg.Valid()) { OpRegCopy(rl_dest.reg, reg); } else { - LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64); + LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64, kNotVolatile); } return; } @@ -211,7 +211,7 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { OpRegCopy(rl_dest.reg.GetHigh(), reg_arg_high); Load32Disp(TargetReg(kSp), offset, rl_dest.reg.GetLow()); } else { - LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64); + LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64, kNotVolatile); } } } @@ -243,14 +243,11 @@ bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) { r_result = wide ? AllocTypedTempWide(rl_dest.fp, reg_class) : AllocTypedTemp(rl_dest.fp, reg_class); } - if (data.is_volatile) { - LoadBaseDispVolatile(reg_obj, data.field_offset, r_result, size); - // Without context sensitive analysis, we must issue the most conservative barriers. - // In this case, either a load or store may follow so we issue both barriers. - GenMemBarrier(kLoadLoad); - GenMemBarrier(kLoadStore); + if (ref) { + LoadRefDisp(reg_obj, data.field_offset, r_result, data.is_volatile ? kVolatile : kNotVolatile); } else { - LoadBaseDisp(reg_obj, data.field_offset, r_result, size); + LoadBaseDisp(reg_obj, data.field_offset, r_result, size, data.is_volatile ? kVolatile : + kNotVolatile); } if (r_result != rl_dest.reg) { if (wide) { @@ -288,14 +285,11 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) { RegStorage reg_obj = LoadArg(data.object_arg, kRefReg); RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile); RegStorage reg_src = LoadArg(data.src_arg, reg_class, wide); - if (data.is_volatile) { - // There might have been a store before this volatile one so insert StoreStore barrier. - GenMemBarrier(kStoreStore); - StoreBaseDispVolatile(reg_obj, data.field_offset, reg_src, size); - // A load might follow the volatile store so insert a StoreLoad barrier. - GenMemBarrier(kStoreLoad); + if (ref) { + StoreRefDisp(reg_obj, data.field_offset, reg_src, data.is_volatile ? kVolatile : kNotVolatile); } else { - StoreBaseDisp(reg_obj, data.field_offset, reg_src, size); + StoreBaseDisp(reg_obj, data.field_offset, reg_src, size, data.is_volatile ? kVolatile : + kNotVolatile); } if (ref) { MarkGCCard(reg_src, reg_obj); diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index f70087d451..b07c85e2c3 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -663,6 +663,7 @@ class Mir2Lir : public Backend { virtual void Materialize(); virtual CompiledMethod* GetCompiledMethod(); void MarkSafepointPC(LIR* inst); + void MarkSafepointPCAfter(LIR* after); void SetupResourceMasks(LIR* lir); void SetMemRefType(LIR* lir, bool is_load, int mem_type); void AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit); @@ -830,6 +831,7 @@ class Mir2Lir : public Backend { void GenArrayBoundsCheck(int32_t index, RegStorage length); LIR* GenNullCheck(RegStorage reg); void MarkPossibleNullPointerException(int opt_flags); + void MarkPossibleNullPointerExceptionAfter(int opt_flags, LIR* after); void MarkPossibleStackOverflowException(); void ForceImplicitNullCheck(RegStorage reg, int opt_flags); LIR* GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, ThrowKind kind); @@ -1007,15 +1009,20 @@ class Mir2Lir : public Backend { virtual LIR* LoadConstant(RegStorage r_dest, int value); // Natural word size. virtual LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) { - return LoadBaseDisp(r_base, displacement, r_dest, kWord); + return LoadBaseDisp(r_base, displacement, r_dest, kWord, kNotVolatile); } // Load 32 bits, regardless of target. virtual LIR* Load32Disp(RegStorage r_base, int displacement, RegStorage r_dest) { - return LoadBaseDisp(r_base, displacement, r_dest, k32); + return LoadBaseDisp(r_base, displacement, r_dest, k32, kNotVolatile); } // Load a reference at base + displacement and decompress into register. - virtual LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest) { - return LoadBaseDisp(r_base, displacement, r_dest, kReference); + virtual LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest, + VolatileKind is_volatile) { + return LoadBaseDisp(r_base, displacement, r_dest, kReference, is_volatile); + } + // Load a reference at base + index and decompress into register. + virtual LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest) { + return LoadBaseIndexed(r_base, r_index, r_dest, 2, kReference); } // Load Dalvik value with 32-bit memory storage. If compressed object reference, decompress. virtual RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind); @@ -1033,15 +1040,20 @@ class Mir2Lir : public Backend { virtual void LoadValueDirectWideFixed(RegLocation rl_src, RegStorage r_dest); // Store an item of natural word size. virtual LIR* StoreWordDisp(RegStorage r_base, int displacement, RegStorage r_src) { - return StoreBaseDisp(r_base, displacement, r_src, kWord); + return StoreBaseDisp(r_base, displacement, r_src, kWord, kNotVolatile); } // Store an uncompressed reference into a compressed 32-bit container. - virtual LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src) { - return StoreBaseDisp(r_base, displacement, r_src, kReference); + virtual LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src, + VolatileKind is_volatile) { + return StoreBaseDisp(r_base, displacement, r_src, kReference, is_volatile); + } + // Store an uncompressed reference into a compressed 32-bit container by index. + virtual LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src) { + return StoreBaseIndexed(r_base, r_index, r_src, 2, kReference); } // Store 32 bits, regardless of target. virtual LIR* Store32Disp(RegStorage r_base, int displacement, RegStorage r_src) { - return StoreBaseDisp(r_base, displacement, r_src, k32); + return StoreBaseDisp(r_base, displacement, r_src, k32, kNotVolatile); } /** @@ -1144,20 +1156,16 @@ class Mir2Lir : public Backend { virtual RegStorage LoadHelper(ThreadOffset<4> offset) = 0; virtual RegStorage LoadHelper(ThreadOffset<8> offset) = 0; - virtual LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) = 0; virtual LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) = 0; + OpSize size, VolatileKind is_volatile) = 0; virtual LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale, OpSize size) = 0; virtual LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, RegStorage r_dest, OpSize size) = 0; virtual LIR* LoadConstantNoClobber(RegStorage r_dest, int value) = 0; virtual LIR* LoadConstantWide(RegStorage r_dest, int64_t value) = 0; - virtual LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) = 0; virtual LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) = 0; + OpSize size, VolatileKind is_volatile) = 0; virtual LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale, OpSize size) = 0; virtual LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index 5bb0ee04d4..60eebe4a25 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -735,7 +735,7 @@ void Mir2Lir::FlushRegWide(RegStorage reg) { } int v_reg = mir_graph_->SRegToVReg(info1->SReg()); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64); + StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64, kNotVolatile); } } else { RegisterInfo* info = GetRegInfo(reg); @@ -743,7 +743,7 @@ void Mir2Lir::FlushRegWide(RegStorage reg) { info->SetIsDirty(false); int v_reg = mir_graph_->SRegToVReg(info->SReg()); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64); + StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64, kNotVolatile); } } } @@ -755,7 +755,7 @@ void Mir2Lir::FlushReg(RegStorage reg) { info->SetIsDirty(false); int v_reg = mir_graph_->SRegToVReg(info->SReg()); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, kWord); + StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, kWord, kNotVolatile); } } diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 3f54798b7e..f06f08ee52 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -325,11 +325,21 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \ { kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } +#define EXT_0F_REX_NO_PREFIX_ENCODING_MAP(opname, opcode, reg_def) \ +{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { REX, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \ +{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { REX, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { REX, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } + #define EXT_0F_REX_W_ENCODING_MAP(opname, prefix, opcode, reg_def) \ { kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \ { kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \ { kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } +#define EXT_0F_REX_W_NO_PREFIX_ENCODING_MAP(opname, opcode, reg_def) \ +{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { REX_W, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \ +{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { REX_W, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { REX_W, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } + #define EXT_0F_ENCODING2_MAP(opname, prefix, opcode, opcode2, reg_def) \ { kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \ { kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \ @@ -481,6 +491,10 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0), EXT_0F_ENCODING_MAP(Movsx8, 0x00, 0xBE, REG_DEF0), EXT_0F_ENCODING_MAP(Movsx16, 0x00, 0xBF, REG_DEF0), + EXT_0F_REX_NO_PREFIX_ENCODING_MAP(Movzx8q, 0xB6, REG_DEF0), + EXT_0F_REX_W_NO_PREFIX_ENCODING_MAP(Movzx16q, 0xB7, REG_DEF0), + EXT_0F_REX_NO_PREFIX_ENCODING_MAP(Movsx8q, 0xBE, REG_DEF0), + EXT_0F_REX_W_NO_PREFIX_ENCODING_MAP(Movsx16q, 0xBF, REG_DEF0), #undef EXT_0F_ENCODING_MAP { kX86Jcc8, kJcc, IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0, 0, 0x70, 0, 0, 0, 0, 0, false }, "Jcc8", "!1c !0t" }, @@ -827,7 +841,8 @@ void X86Mir2Lir::CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw CHECK(strchr(entry->name, '8') != nullptr) << entry->name; } else { if (entry->skeleton.immediate_bytes != 1) { // Ignore ...I8 instructions. - if (!StartsWith(entry->name, "Movzx8") && !StartsWith(entry->name, "Movsx8")) { + if (!StartsWith(entry->name, "Movzx8") && !StartsWith(entry->name, "Movsx8") + && !StartsWith(entry->name, "Movzx8q") && !StartsWith(entry->name, "Movsx8q")) { CHECK(strchr(entry->name, '8') == nullptr) << entry->name; } } diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index 28195aba36..425caec177 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -295,7 +295,8 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { setup_method_address_[0] = NewLIR1(kX86StartOfMethod, rs_rX86_ARG0.GetReg()); int displacement = SRegOffset(base_of_code_->s_reg_low); // Native pointer - must be natural word size. - setup_method_address_[1] = StoreBaseDisp(rs_rX86_SP, displacement, rs_rX86_ARG0, Gen64Bit() ? k64 : k32); + setup_method_address_[1] = StoreBaseDisp(rs_rX86_SP, displacement, rs_rX86_ARG0, + Gen64Bit() ? k64 : k32, kNotVolatile); } FreeTemp(rs_rX86_ARG0); diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index d482e58521..70382c746a 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -68,20 +68,16 @@ class X86Mir2Lir : public Mir2Lir { LIR* CheckSuspendUsingLoad() OVERRIDE; RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE; RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE; - LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) OVERRIDE; LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) OVERRIDE; + OpSize size, VolatileKind is_volatile) OVERRIDE; LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale, OpSize size) OVERRIDE; LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, RegStorage r_dest, OpSize size) OVERRIDE; LIR* LoadConstantNoClobber(RegStorage r_dest, int value); LIR* LoadConstantWide(RegStorage r_dest, int64_t value); - LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) OVERRIDE; LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) OVERRIDE; + OpSize size, VolatileKind is_volatile) OVERRIDE; LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale, OpSize size) OVERRIDE; LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc index 1f05ab9cf4..f854adb175 100644 --- a/compiler/dex/quick/x86/fp_x86.cc +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -144,7 +144,7 @@ void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_do } else { // It must have been register promoted if it is not a temp but is still in physical // register. Since we need it to be in memory to convert, we place it there now. - StoreBaseDisp(TargetReg(kSp), src_v_reg_offset, rl_src.reg, k64); + StoreBaseDisp(TargetReg(kSp), src_v_reg_offset, rl_src.reg, k64, kNotVolatile); } } @@ -178,7 +178,7 @@ void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_do */ rl_result = EvalLoc(rl_dest, kFPReg, true); if (is_double) { - LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64); + LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64, kNotVolatile); StoreFinalValueWide(rl_dest, rl_result); } else { @@ -221,7 +221,7 @@ void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, LoadConstant(rl_result.reg, 0x7fffffff); NewLIR2(kX86Cvtsi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg()); NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg()); - LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA); + LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe); LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); LIR* branch_normal = NewLIR1(kX86Jmp8, 0); @@ -242,7 +242,7 @@ void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, LoadConstant(rl_result.reg, 0x7fffffff); NewLIR2(kX86Cvtsi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg()); NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg()); - LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA); + LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe); LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); LIR* branch_normal = NewLIR1(kX86Jmp8, 0); @@ -281,7 +281,7 @@ void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, LoadConstantWide(rl_result.reg, 0x7fffffffffffffff); NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg()); NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg()); - LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA); + LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe); LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); LIR* branch_normal = NewLIR1(kX86Jmp8, 0); @@ -306,7 +306,7 @@ void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, LoadConstantWide(rl_result.reg, 0x7fffffffffffffff); NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg()); NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg()); - LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA); + LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe); LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); LIR* branch_normal = NewLIR1(kX86Jmp8, 0); @@ -363,7 +363,8 @@ void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation } else { // It must have been register promoted if it is not a temp but is still in physical // register. Since we need it to be in memory to convert, we place it there now. - StoreBaseDisp(TargetReg(kSp), src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32); + StoreBaseDisp(TargetReg(kSp), src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32, + kNotVolatile); } } @@ -373,7 +374,8 @@ void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation FlushSpecificReg(reg_info); ResetDef(rl_src2.reg); } else { - StoreBaseDisp(TargetReg(kSp), src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32); + StoreBaseDisp(TargetReg(kSp), src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32, + kNotVolatile); } } @@ -433,7 +435,7 @@ void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation if (rl_result.location == kLocPhysReg) { rl_result = EvalLoc(rl_dest, kFPReg, true); if (is_double) { - LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64); + LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64, kNotVolatile); StoreFinalValueWide(rl_dest, rl_result); } else { Load32Disp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg); diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index b905312726..350cfb86be 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -35,14 +35,13 @@ void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, rl_src1 = LoadValueWide(rl_src1, kCoreReg); rl_src2 = LoadValueWide(rl_src2, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - OpRegReg(kOpXor, rl_result.reg, rl_result.reg); // result = 0 - OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); - NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondNe); // result = (src1 != src2) ? 1 : result RegStorage temp_reg = AllocTemp(); - OpRegReg(kOpNeg, temp_reg, rl_result.reg); OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); - // result = (src1 < src2) ? -result : result - OpCondRegReg(kOpCmov, kCondLt, rl_result.reg, temp_reg); + NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG); // result = (src1 > src2) ? 1 : 0 + NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL); // temp = (src1 >= src2) ? 0 : 1 + NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg()); + NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); + StoreValue(rl_dest, rl_result); FreeTemp(temp_reg); return; @@ -755,7 +754,7 @@ bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); // Unaligned access is allowed on x86. - LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size); + LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile); if (size == k64) { StoreValueWide(rl_dest, rl_result); } else { @@ -773,12 +772,12 @@ bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { if (size == k64) { // Unaligned access is allowed on x86. RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); - StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size); + StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile); } else { DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); // Unaligned access is allowed on x86. RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); - StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size); + StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile); } return true; } @@ -1139,7 +1138,7 @@ void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg()); break; case 1: - LoadBaseDisp(rs_rX86_SP, displacement, dest, k32); + LoadBaseDisp(rs_rX86_SP, displacement, dest, k32, kNotVolatile); break; default: m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(), @@ -1295,7 +1294,8 @@ void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation if (src1_in_reg) { NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg()); } else { - LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32); + LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32, + kNotVolatile); } if (is_square) { @@ -1318,7 +1318,8 @@ void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation if (src2_in_reg) { NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg()); } else { - LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32); + LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32, + kNotVolatile); } // EAX <- EAX * 1L (2H * 1L) @@ -1351,7 +1352,8 @@ void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation if (src2_in_reg) { NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg()); } else { - LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32); + LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32, + kNotVolatile); } // EDX:EAX <- 2L * 1L (double precision) @@ -2290,21 +2292,21 @@ void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, if (rl_method.location == kLocPhysReg) { if (use_declaring_class) { LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), - check_class); + check_class, kNotVolatile); } else { LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), - check_class); - LoadRefDisp(check_class, offset_of_type, check_class); + check_class, kNotVolatile); + LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile); } } else { LoadCurrMethodDirect(check_class); if (use_declaring_class) { LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), - check_class); + check_class, kNotVolatile); } else { LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), - check_class); - LoadRefDisp(check_class, offset_of_type, check_class); + check_class, kNotVolatile); + LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile); } } @@ -2351,16 +2353,16 @@ void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_k } else if (use_declaring_class) { LoadValueDirectFixed(rl_src, TargetReg(kArg0)); LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(), - class_reg); + class_reg, kNotVolatile); } else { // Load dex cache entry into class_reg (kArg2). LoadValueDirectFixed(rl_src, TargetReg(kArg0)); LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), - class_reg); + class_reg, kNotVolatile); int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() + (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx); - LoadRefDisp(class_reg, offset_of_type, class_reg); + LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile); if (!can_assume_type_is_in_dex_cache) { // Need to test presence of type in dex cache at runtime. LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL); @@ -2393,7 +2395,8 @@ void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_k /* Load object->klass_. */ DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0); - LoadRefDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1)); + LoadRefDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1), + kNotVolatile); /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class. */ LIR* branchover = nullptr; if (type_known_final) { diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 078dd5a73b..e369d26df3 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -1866,7 +1866,7 @@ void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { StoreValue(rl_method, rl_src); // If Method* has been promoted, explicitly flush if (rl_method.location == kLocPhysReg) { - StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0)); + StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0), kNotVolatile); } if (cu_->num_ins == 0) { @@ -1916,11 +1916,11 @@ void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { } if (need_flush) { if (t_loc->wide && t_loc->fp) { - StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64); + StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64, kNotVolatile); // Increment i to skip the next one i++; } else if (t_loc->wide && !t_loc->fp) { - StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64); + StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64, kNotVolatile); // Increment i to skip the next one i++; } else { @@ -2018,14 +2018,14 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, loc = UpdateLocWide(loc); if (loc.location == kLocPhysReg) { ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64); + StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile); } next_arg += 2; } else { loc = UpdateLoc(loc); if (loc.location == kLocPhysReg) { ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32); + StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile); } next_arg++; } @@ -2161,18 +2161,18 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (rl_arg.wide) { if (rl_arg.location == kLocPhysReg) { - StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64); + StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile); } else { LoadValueDirectWideFixed(rl_arg, regWide); - StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64); + StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64, kNotVolatile); } i++; } else { if (rl_arg.location == kLocPhysReg) { - StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32); + StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile); } else { LoadValueDirectFixed(rl_arg, regSingle); - StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32); + StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32, kNotVolatile); } } } diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index 46e877f8f9..0352808a7c 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -585,7 +585,7 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { // value. ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val), - kDouble); + kDouble, kNotVolatile); res->target = data_target; res->flags.fixup = kFixupLoad; store_method_addr_used_ = true; @@ -611,8 +611,12 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { if (val_lo < 0) { val_hi += 1; } - res = LoadConstantNoClobber(RegStorage::Solo32(r_dest.GetReg()), val_hi); - NewLIR2(kX86Sal64RI, r_dest.GetReg(), 32); + if (val_hi != 0) { + res = LoadConstantNoClobber(RegStorage::Solo32(r_dest.GetReg()), val_hi); + NewLIR2(kX86Sal64RI, r_dest.GetReg(), 32); + } else { + res = NewLIR2(kX86Xor64RR, r_dest.GetReg(), r_dest.GetReg()); + } if (val_lo != 0) { NewLIR2(kX86Add64RI, r_dest.GetReg(), val_lo); } @@ -752,17 +756,22 @@ LIR* X86Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStora return LoadBaseIndexedDisp(r_base, r_index, scale, 0, r_dest, size); } -LIR* X86Mir2Lir::LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) { +LIR* X86Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, + OpSize size, VolatileKind is_volatile) { // LoadBaseDisp() will emit correct insn for atomic load on x86 // assuming r_dest is correctly prepared using RegClassForFieldLoadStore(). - return LoadBaseDisp(r_base, displacement, r_dest, size); -} -LIR* X86Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) { - return LoadBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_dest, - size); + LIR* load = LoadBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_dest, + size); + + if (UNLIKELY(is_volatile == kVolatile)) { + // Without context sensitive analysis, we must issue the most conservative barriers. + // In this case, either a load or store may follow so we issue both barriers. + GenMemBarrier(kLoadLoad); + GenMemBarrier(kLoadStore); + } + + return load; } LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, @@ -850,20 +859,28 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int /* store value base base + scaled index. */ LIR* X86Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, - int scale, OpSize size) { + int scale, OpSize size) { return StoreBaseIndexedDisp(r_base, r_index, scale, 0, r_src, size); } -LIR* X86Mir2Lir::StoreBaseDispVolatile(RegStorage r_base, int displacement, - RegStorage r_src, OpSize size) { +LIR* X86Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size, + VolatileKind is_volatile) { + if (UNLIKELY(is_volatile == kVolatile)) { + // There might have been a store before this volatile one so insert StoreStore barrier. + GenMemBarrier(kStoreStore); + } + // StoreBaseDisp() will emit correct insn for atomic store on x86 // assuming r_dest is correctly prepared using RegClassForFieldLoadStore(). - return StoreBaseDisp(r_base, displacement, r_src, size); -} -LIR* X86Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, - RegStorage r_src, OpSize size) { - return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src, size); + LIR* store = StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src, size); + + if (UNLIKELY(is_volatile == kVolatile)) { + // A load might follow the volatile store so insert a StoreLoad barrier. + GenMemBarrier(kStoreLoad); + } + + return store; } LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg, diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 28b9dca193..17c44bc2c7 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -609,6 +609,10 @@ enum X86OpCode { Binary0fOpCode(kX86Movzx16), // zero-extend 16-bit value Binary0fOpCode(kX86Movsx8), // sign-extend 8-bit value Binary0fOpCode(kX86Movsx16), // sign-extend 16-bit value + Binary0fOpCode(kX86Movzx8q), // zero-extend 8-bit value to quad word + Binary0fOpCode(kX86Movzx16q), // zero-extend 16-bit value to quad word + Binary0fOpCode(kX86Movsx8q), // sign-extend 8-bit value to quad word + Binary0fOpCode(kX86Movsx16q), // sign-extend 16-bit value to quad word #undef Binary0fOpCode kX86Jcc8, kX86Jcc32, // jCC rel8/32; lir operands - 0: rel, 1: CC, target assigned kX86Jmp8, kX86Jmp32, // jmp rel8/32; lir operands - 0: rel, target assigned @@ -707,6 +711,8 @@ struct X86EncodingMap { #define REX_X 0x42 // Extension of the ModR/M r/m field, SIB base field, or Opcode reg field #define REX_B 0x41 +// Extended register set +#define REX 0x40 // Mask extracting the least 3 bits of r0..r15 #define kRegNumMask32 0x07 // Value indicating that base or reg is not used diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 3e326f0633..96625c5dac 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -697,7 +697,7 @@ void CompilerDriver::LoadImageClasses(TimingLogger* timings) return; } - timings->NewSplit("LoadImageClasses"); + TimingLogger::ScopedTiming t("LoadImageClasses", timings); // Make a first class to load all classes explicitly listed in the file Thread* self = Thread::Current(); ScopedObjectAccess soa(self); @@ -794,8 +794,7 @@ void CompilerDriver::FindClinitImageClassesCallback(mirror::Object* object, void void CompilerDriver::UpdateImageClasses(TimingLogger* timings) { if (IsImage()) { - timings->NewSplit("UpdateImageClasses"); - + TimingLogger::ScopedTiming t("UpdateImageClasses", timings); // Update image_classes_ with classes for objects created by <clinit> methods. Thread* self = Thread::Current(); const char* old_cause = self->StartAssertNoThreadSuspension("ImageWriter"); @@ -1606,11 +1605,11 @@ void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_fil if (IsImage()) { // For images we resolve all types, such as array, whereas for applications just those with // classdefs are resolved by ResolveClassFieldsAndMethods. - timings->NewSplit("Resolve Types"); + TimingLogger::ScopedTiming t("Resolve Types", timings); context.ForAll(0, dex_file.NumTypeIds(), ResolveType, thread_count_); } - timings->NewSplit("Resolve MethodsAndFields"); + TimingLogger::ScopedTiming t("Resolve MethodsAndFields", timings); context.ForAll(0, dex_file.NumClassDefs(), ResolveClassFieldsAndMethods, thread_count_); } @@ -1672,7 +1671,7 @@ static void VerifyClass(const ParallelCompilationManager* manager, size_t class_ void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file, ThreadPool* thread_pool, TimingLogger* timings) { - timings->NewSplit("Verify Dex File"); + TimingLogger::ScopedTiming t("Verify Dex File", timings); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, thread_pool); context.ForAll(0, dex_file.NumClassDefs(), VerifyClass, thread_count_); @@ -1765,7 +1764,7 @@ static void InitializeClass(const ParallelCompilationManager* manager, size_t cl void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file, ThreadPool* thread_pool, TimingLogger* timings) { - timings->NewSplit("InitializeNoClinit"); + TimingLogger::ScopedTiming t("InitializeNoClinit", timings); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, thread_pool); size_t thread_count; @@ -1877,7 +1876,7 @@ void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, siz void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file, ThreadPool* thread_pool, TimingLogger* timings) { - timings->NewSplit("Compile Dex File"); + TimingLogger::ScopedTiming t("Compile Dex File", timings); ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this, &dex_file, thread_pool); context.ForAll(0, dex_file.NumClassDefs(), CompilerDriver::CompileClass, thread_count_); @@ -2054,7 +2053,9 @@ bool CompilerDriver::SkipCompilation(const std::string& method_name) { ProfileFile::ProfileData data; if (!profile_file_.GetProfileData(&data, method_name)) { // Not in profile, no information can be determined. - VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile"; + if (kIsDebugBuild) { + VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile"; + } return true; } @@ -2063,13 +2064,16 @@ bool CompilerDriver::SkipCompilation(const std::string& method_name) { // falls inside a bucket. bool compile = data.GetTopKUsedPercentage() - data.GetUsedPercent() <= compiler_options_->GetTopKProfileThreshold(); - if (compile) { - LOG(INFO) << "compiling method " << method_name << " because its usage is part of top " - << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%" - << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")"; - } else { - VLOG(compiler) << "not compiling method " << method_name << " because it's not part of leading " - << compiler_options_->GetTopKProfileThreshold() << "% samples)"; + if (kIsDebugBuild) { + if (compile) { + LOG(INFO) << "compiling method " << method_name << " because its usage is part of top " + << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%" + << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")"; + } else { + VLOG(compiler) << "not compiling method " << method_name + << " because it's not part of leading " << compiler_options_->GetTopKProfileThreshold() + << "% samples)"; + } } return !compile; } diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index ca956aac36..5325a68b37 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -38,12 +38,12 @@ class CompilerDriverTest : public CommonCompilerTest { protected: void CompileAll(jobject class_loader) LOCKS_EXCLUDED(Locks::mutator_lock_) { TimingLogger timings("CompilerDriverTest::CompileAll", false, false); - timings.StartSplit("CompileAll"); + TimingLogger::ScopedTiming t(__FUNCTION__, &timings); compiler_driver_->CompileAll(class_loader, Runtime::Current()->GetCompileTimeClassPath(class_loader), &timings); + t.NewTiming("MakeAllExecutable"); MakeAllExecutable(class_loader); - timings.EndSplit(); } void EnsureCompiled(jobject class_loader, const char* class_name, const char* method, diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 92be147a40..e8bbaef1e9 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -64,7 +64,7 @@ TEST_F(ImageTest, WriteRead) { jobject class_loader = NULL; ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); TimingLogger timings("ImageTest::WriteRead", false, false); - timings.StartSplit("CompileAll"); + TimingLogger::ScopedTiming t("CompileAll", &timings); if (kUsePortableCompiler) { // TODO: we disable this for portable so the test executes in a reasonable amount of time. // We shouldn't need to do this. @@ -75,6 +75,7 @@ TEST_F(ImageTest, WriteRead) { } compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings); + t.NewTiming("WriteElf"); ScopedObjectAccess soa(Thread::Current()); OatWriter oat_writer(class_linker->GetBootClassPath(), 0, 0, "", compiler_driver_.get(), &timings); @@ -84,7 +85,6 @@ TEST_F(ImageTest, WriteRead) { &oat_writer, oat_file.GetFile()); ASSERT_TRUE(success); - timings.EndSplit(); } } // Workound bug that mcld::Linker::emit closes oat_file by reopening as dup_oat. diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index c6b9161b63..e1b6992c47 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -91,31 +91,31 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, size_oat_class_method_offsets_(0) { size_t offset; { - TimingLogger::ScopedSplit split("InitOatHeader", timings); + TimingLogger::ScopedTiming split("InitOatHeader", timings); offset = InitOatHeader(); } { - TimingLogger::ScopedSplit split("InitOatDexFiles", timings); + TimingLogger::ScopedTiming split("InitOatDexFiles", timings); offset = InitOatDexFiles(offset); } { - TimingLogger::ScopedSplit split("InitDexFiles", timings); + TimingLogger::ScopedTiming split("InitDexFiles", timings); offset = InitDexFiles(offset); } { - TimingLogger::ScopedSplit split("InitOatClasses", timings); + TimingLogger::ScopedTiming split("InitOatClasses", timings); offset = InitOatClasses(offset); } { - TimingLogger::ScopedSplit split("InitOatMaps", timings); + TimingLogger::ScopedTiming split("InitOatMaps", timings); offset = InitOatMaps(offset); } { - TimingLogger::ScopedSplit split("InitOatCode", timings); + TimingLogger::ScopedTiming split("InitOatCode", timings); offset = InitOatCode(offset); } { - TimingLogger::ScopedSplit split("InitOatCodeDexFiles", timings); + TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings); offset = InitOatCodeDexFiles(offset); } size_ = offset; @@ -800,6 +800,7 @@ size_t OatWriter::InitOatMaps(size_t offset) { size_t OatWriter::InitOatCode(size_t offset) { // calculate the offsets within OatHeader to executable code size_t old_offset = offset; + size_t adjusted_offset = offset; // required to be on a new page boundary offset = RoundUp(offset, kPageSize); oat_header_->SetExecutableOffset(offset); @@ -809,7 +810,8 @@ size_t OatWriter::InitOatCode(size_t offset) { #define DO_TRAMPOLINE(field, fn_name) \ offset = CompiledCode::AlignCode(offset, instruction_set); \ - oat_header_->Set ## fn_name ## Offset(offset); \ + adjusted_offset = offset + CompiledCode::CodeDelta(instruction_set); \ + oat_header_->Set ## fn_name ## Offset(adjusted_offset); \ field.reset(compiler_driver_->Create ## fn_name()); \ offset += field->size(); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 521992ad3a..c3a322caee 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -93,15 +93,30 @@ static bool CanHandleCodeItem(const DexFile::CodeItem& code_item) { } template<typename T> -void HGraphBuilder::If_22t(const Instruction& instruction, int32_t dex_offset, bool is_not) { +void HGraphBuilder::If_22t(const Instruction& instruction, int32_t dex_offset) { HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); - current_block_->AddInstruction(new (arena_) T(first, second)); - if (is_not) { - current_block_->AddInstruction(new (arena_) HNot(current_block_->GetLastInstruction())); - } - current_block_->AddInstruction(new (arena_) HIf(current_block_->GetLastInstruction())); - HBasicBlock* target = FindBlockStartingAt(instruction.GetTargetOffset() + dex_offset); + T* comparison = new (arena_) T(first, second); + current_block_->AddInstruction(comparison); + HInstruction* ifinst = new (arena_) HIf(comparison); + current_block_->AddInstruction(ifinst); + HBasicBlock* target = FindBlockStartingAt(dex_offset + instruction.GetTargetOffset()); + DCHECK(target != nullptr); + current_block_->AddSuccessor(target); + target = FindBlockStartingAt(dex_offset + instruction.SizeInCodeUnits()); + DCHECK(target != nullptr); + current_block_->AddSuccessor(target); + current_block_ = nullptr; +} + +template<typename T> +void HGraphBuilder::If_21t(const Instruction& instruction, int32_t dex_offset) { + HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); + T* comparison = new (arena_) T(value, GetIntConstant(0)); + current_block_->AddInstruction(comparison); + HInstruction* ifinst = new (arena_) HIf(comparison); + current_block_->AddInstruction(ifinst); + HBasicBlock* target = FindBlockStartingAt(dex_offset + instruction.GetTargetOffset()); DCHECK(target != nullptr); current_block_->AddSuccessor(target); target = FindBlockStartingAt(dex_offset + instruction.SizeInCodeUnits()); @@ -340,16 +355,38 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ break; } + case Instruction::CONST: { + int32_t register_index = instruction.VRegA(); + HIntConstant* constant = GetIntConstant(instruction.VRegB_31i()); + UpdateLocal(register_index, constant); + break; + } + + case Instruction::CONST_HIGH16: { + int32_t register_index = instruction.VRegA(); + HIntConstant* constant = GetIntConstant(instruction.VRegB_21h() << 16); + UpdateLocal(register_index, constant); + break; + } + case Instruction::CONST_WIDE_16: { int32_t register_index = instruction.VRegA(); - HLongConstant* constant = GetLongConstant(instruction.VRegB_21s()); + // Get 16 bits of constant value, sign extended to 64 bits. + int64_t value = instruction.VRegB_21s(); + value <<= 48; + value >>= 48; + HLongConstant* constant = GetLongConstant(value); UpdateLocal(register_index, constant); break; } case Instruction::CONST_WIDE_32: { int32_t register_index = instruction.VRegA(); - HLongConstant* constant = GetLongConstant(instruction.VRegB_31i()); + // Get 32 bits of constant value, sign extended to 64 bits. + int64_t value = instruction.VRegB_31i(); + value <<= 32; + value >>= 32; + HLongConstant* constant = GetLongConstant(value); UpdateLocal(register_index, constant); break; } @@ -361,27 +398,58 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ break; } - case Instruction::MOVE: { + case Instruction::CONST_WIDE_HIGH16: { + int32_t register_index = instruction.VRegA(); + int64_t value = static_cast<int64_t>(instruction.VRegB_21h()) << 48; + HLongConstant* constant = GetLongConstant(value); + UpdateLocal(register_index, constant); + break; + } + + // TODO: these instructions are also used to move floating point values, so what is + // the type (int or float)? + case Instruction::MOVE: + case Instruction::MOVE_FROM16: + case Instruction::MOVE_16: { HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); UpdateLocal(instruction.VRegA(), value); break; } - case Instruction::RETURN_VOID: { - BuildReturn(instruction, Primitive::kPrimVoid); + // TODO: these instructions are also used to move floating point values, so what is + // the type (long or double)? + case Instruction::MOVE_WIDE: + case Instruction::MOVE_WIDE_FROM16: + case Instruction::MOVE_WIDE_16: { + HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimLong); + UpdateLocal(instruction.VRegA(), value); break; } - case Instruction::IF_EQ: { - If_22t<HEqual>(instruction, dex_offset, false); + case Instruction::MOVE_OBJECT: + case Instruction::MOVE_OBJECT_16: + case Instruction::MOVE_OBJECT_FROM16: { + HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimNot); + UpdateLocal(instruction.VRegA(), value); break; } - case Instruction::IF_NE: { - If_22t<HEqual>(instruction, dex_offset, true); + case Instruction::RETURN_VOID: { + BuildReturn(instruction, Primitive::kPrimVoid); break; } +#define IF_XX(comparison, cond) \ + case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_offset); break; \ + case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_offset); break + + IF_XX(HEqual, EQ); + IF_XX(HNotEqual, NE); + IF_XX(HLessThan, LT); + IF_XX(HLessThanOrEqual, LE); + IF_XX(HGreaterThan, GT); + IF_XX(HGreaterThanOrEqual, GE); + case Instruction::GOTO: case Instruction::GOTO_16: case Instruction::GOTO_32: { @@ -500,10 +568,10 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ } case Instruction::MOVE_RESULT: - case Instruction::MOVE_RESULT_WIDE: { + case Instruction::MOVE_RESULT_WIDE: + case Instruction::MOVE_RESULT_OBJECT: UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); break; - } case Instruction::NOP: break; diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 108514a632..0852a26c55 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -22,17 +22,11 @@ #include "primitive.h" #include "utils/allocation.h" #include "utils/growable_array.h" +#include "nodes.h" namespace art { -class ArenaAllocator; class Instruction; -class HBasicBlock; -class HGraph; -class HIntConstant; -class HLongConstant; -class HInstruction; -class HLocal; class HGraphBuilder : public ValueObject { public: @@ -90,7 +84,11 @@ class HGraphBuilder : public ValueObject { template<typename T> void Binop_22s(const Instruction& instruction, bool reverse); - template<typename T> void If_22t(const Instruction& instruction, int32_t dex_offset, bool is_not); + template<typename T> + void If_22t(const Instruction& instruction, int32_t dex_offset); + + template<typename T> + void If_21t(const Instruction& instruction, int32_t dex_offset); void BuildReturn(const Instruction& instruction, Primitive::Type type); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 212a6dc370..c5862dad92 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -34,6 +34,35 @@ arm::ArmManagedRegister Location::AsArm() const { namespace arm { + +inline Condition ARMCondition(IfCondition cond) { + switch (cond) { + case kCondEQ: return EQ; + case kCondNE: return NE; + case kCondLT: return LT; + case kCondLE: return LE; + case kCondGT: return GT; + case kCondGE: return GE; + default: + LOG(FATAL) << "Unknown if condition"; + } + return EQ; // Unreachable. +} + +inline Condition ARMOppositeCondition(IfCondition cond) { + switch (cond) { + case kCondEQ: return NE; + case kCondNE: return EQ; + case kCondLT: return GE; + case kCondLE: return GT; + case kCondGT: return LE; + case kCondGE: return LT; + default: + LOG(FATAL) << "Unknown if condition"; + } + return EQ; // Unreachable. +} + static constexpr int kNumberOfPushedRegistersAtEntry = 1; static constexpr int kCurrentMethodStackOffset = 0; @@ -419,33 +448,103 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit) { void LocationsBuilderARM::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, Location::Any()); if_instr->SetLocations(locations); } void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { - // TODO: Generate the input as a condition, instead of materializing in a register. - __ cmp(if_instr->GetLocations()->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(0)); - __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()), EQ); - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) { - __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + HInstruction* cond = if_instr->InputAt(0); + DCHECK(cond->IsCondition()); + HCondition* condition = cond->AsCondition(); + if (condition->NeedsMaterialization()) { + // Condition has been materialized, compare the output to 0 + if (!if_instr->GetLocations()->InAt(0).IsRegister()) { + LOG(FATAL) << "Materialized condition is not in an ARM register"; + } + __ cmp(if_instr->GetLocations()->InAt(0).AsArm().AsCoreRegister(), + ShifterOperand(0)); + __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), EQ); + } else { + // Condition has not been materialized, use its inputs as the comparison and its + // condition as the branch condition. + __ cmp(condition->GetLocations()->InAt(0).AsArm().AsCoreRegister(), + ShifterOperand(condition->GetLocations()->InAt(1).AsArm().AsCoreRegister())); + __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), + ARMCondition(condition->GetCondition())); + } + if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) { + __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); } } -void LocationsBuilderARM::VisitEqual(HEqual* equal) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal); + +void LocationsBuilderARM::VisitCondition(HCondition* comp) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); - equal->SetLocations(locations); + comp->SetLocations(locations); +} + +void InstructionCodeGeneratorARM::VisitCondition(HCondition* comp) { + if (comp->NeedsMaterialization()) { + LocationSummary* locations = comp->GetLocations(); + __ cmp(locations->InAt(0).AsArm().AsCoreRegister(), + ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister())); + __ it(ARMCondition(comp->GetCondition()), kItElse); + __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(1), + ARMCondition(comp->GetCondition())); + __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(0), + ARMOppositeCondition(comp->GetCondition())); + } +} + +void LocationsBuilderARM::VisitEqual(HEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorARM::VisitEqual(HEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderARM::VisitNotEqual(HNotEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorARM::VisitNotEqual(HNotEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderARM::VisitLessThan(HLessThan* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorARM::VisitLessThan(HLessThan* comp) { + VisitCondition(comp); +} + +void LocationsBuilderARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderARM::VisitGreaterThan(HGreaterThan* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorARM::VisitGreaterThan(HGreaterThan* comp) { + VisitCondition(comp); +} + +void LocationsBuilderARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + VisitCondition(comp); } -void InstructionCodeGeneratorARM::VisitEqual(HEqual* equal) { - LocationSummary* locations = equal->GetLocations(); - __ teq(locations->InAt(0).AsArm().AsCoreRegister(), - ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister())); - __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(1), EQ); - __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(0), NE); +void InstructionCodeGeneratorARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + VisitCondition(comp); } void LocationsBuilderARM::VisitLocal(HLocal* local) { diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 712a24cf67..0e2a079cde 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -20,7 +20,7 @@ #include "code_generator.h" #include "nodes.h" #include "parallel_move_resolver.h" -#include "utils/arm/assembler_arm32.h" +#include "utils/arm/assembler_thumb2.h" namespace art { namespace arm { @@ -180,7 +180,7 @@ class CodeGeneratorARM : public CodeGenerator { LocationsBuilderARM location_builder_; InstructionCodeGeneratorARM instruction_visitor_; ParallelMoveResolverARM move_resolver_; - Arm32Assembler assembler_; + Thumb2Assembler assembler_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM); }; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index f4b12e2d38..a8ee6c061e 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -34,6 +34,20 @@ x86::X86ManagedRegister Location::AsX86() const { namespace x86 { +inline Condition X86Condition(IfCondition cond) { + switch (cond) { + case kCondEQ: return kEqual; + case kCondNE: return kNotEqual; + case kCondLT: return kLess; + case kCondLE: return kLessEqual; + case kCondGT: return kGreater; + case kCondGE: return kGreaterEqual; + default: + LOG(FATAL) << "Unknown if condition"; + } + return kEqual; +} + static constexpr int kNumberOfPushedRegistersAtEntry = 1; static constexpr int kCurrentMethodStackOffset = 0; @@ -421,16 +435,32 @@ void LocationsBuilderX86::VisitIf(HIf* if_instr) { } void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { - // TODO: Generate the input as a condition, instead of materializing in a register. - Location location = if_instr->GetLocations()->InAt(0); - if (location.IsRegister()) { - __ cmpl(location.AsX86().AsCpuRegister(), Immediate(0)); + HInstruction* cond = if_instr->InputAt(0); + DCHECK(cond->IsCondition()); + HCondition* condition = cond->AsCondition(); + if (condition->NeedsMaterialization()) { + // Materialized condition, compare against 0 + Location lhs = if_instr->GetLocations()->InAt(0); + if (lhs.IsRegister()) { + __ cmpl(lhs.AsX86().AsCpuRegister(), Immediate(0)); + } else { + __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); + } + __ j(kEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); } else { - __ cmpl(Address(ESP, location.GetStackIndex()), Immediate(0)); + Location lhs = condition->GetLocations()->InAt(0); + Location rhs = condition->GetLocations()->InAt(1); + // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition). + if (rhs.IsRegister()) { + __ cmpl(lhs.AsX86().AsCpuRegister(), rhs.AsX86().AsCpuRegister()); + } else { + __ cmpl(lhs.AsX86().AsCpuRegister(), Address(ESP, rhs.GetStackIndex())); + } + __ j(X86Condition(condition->GetCondition()), + codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); } - __ j(kEqual, codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) { + __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); } } @@ -475,24 +505,74 @@ void LocationsBuilderX86::VisitStoreLocal(HStoreLocal* store) { void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store) { } -void LocationsBuilderX86::VisitEqual(HEqual* equal) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal); +void LocationsBuilderX86::VisitCondition(HCondition* comp) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); - equal->SetLocations(locations); + comp->SetLocations(locations); } -void InstructionCodeGeneratorX86::VisitEqual(HEqual* equal) { - LocationSummary* locations = equal->GetLocations(); - if (locations->InAt(1).IsRegister()) { - __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(), - locations->InAt(1).AsX86().AsCpuRegister()); - } else { - __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(), - Address(ESP, locations->InAt(1).GetStackIndex())); +void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) { + if (comp->NeedsMaterialization()) { + LocationSummary* locations = comp->GetLocations(); + if (locations->InAt(1).IsRegister()) { + __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(), + locations->InAt(1).AsX86().AsCpuRegister()); + } else { + __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(), + Address(ESP, locations->InAt(1).GetStackIndex())); + } + __ setb(X86Condition(comp->GetCondition()), locations->Out().AsX86().AsCpuRegister()); } - __ setb(kEqual, locations->Out().AsX86().AsCpuRegister()); +} + +void LocationsBuilderX86::VisitEqual(HEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86::VisitLessThan(HLessThan* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + VisitCondition(comp); } void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ebeef9dfc1..283f1f5e57 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -35,6 +35,20 @@ x86_64::X86_64ManagedRegister Location::AsX86_64() const { namespace x86_64 { +inline Condition X86_64Condition(IfCondition cond) { + switch (cond) { + case kCondEQ: return kEqual; + case kCondNE: return kNotEqual; + case kCondLT: return kLess; + case kCondLE: return kLessEqual; + case kCondGT: return kGreater; + case kCondGE: return kGreaterEqual; + default: + LOG(FATAL) << "Unknown if condition"; + } + return kEqual; +} + // Some x86_64 instructions require a register to be available as temp. static constexpr Register TMP = R11; @@ -295,16 +309,32 @@ void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) { void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, Location::Any()); if_instr->SetLocations(locations); } void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { - // TODO: Generate the input as a condition, instead of materializing in a register. - __ cmpl(if_instr->GetLocations()->InAt(0).AsX86_64().AsCpuRegister(), Immediate(0)); - __ j(kEqual, codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + HInstruction* cond = if_instr->InputAt(0); + DCHECK(cond->IsCondition()); + HCondition* condition = cond->AsCondition(); + if (condition->NeedsMaterialization()) { + // Materialized condition, compare against 0. + Location lhs = if_instr->GetLocations()->InAt(0); + if (lhs.IsRegister()) { + __ cmpl(lhs.AsX86_64().AsCpuRegister(), Immediate(0)); + } else { + __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); + } + __ j(kEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + } else { + Location lhs = condition->GetLocations()->InAt(0); + Location rhs = condition->GetLocations()->InAt(1); + __ cmpl(lhs.AsX86_64().AsCpuRegister(), rhs.AsX86_64().AsCpuRegister()); + __ j(X86_64Condition(condition->GetCondition()), + codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + } + if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) { + __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); } } @@ -349,18 +379,69 @@ void LocationsBuilderX86_64::VisitStoreLocal(HStoreLocal* store) { void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store) { } -void LocationsBuilderX86_64::VisitEqual(HEqual* equal) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal); +void LocationsBuilderX86_64::VisitCondition(HCondition* comp) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); - equal->SetLocations(locations); + comp->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) { + if (comp->NeedsMaterialization()) { + __ cmpq(comp->GetLocations()->InAt(0).AsX86_64().AsCpuRegister(), + comp->GetLocations()->InAt(1).AsX86_64().AsCpuRegister()); + __ setcc(X86_64Condition(comp->GetCondition()), + comp->GetLocations()->Out().AsX86_64().AsCpuRegister()); + } +} + +void LocationsBuilderX86_64::VisitEqual(HEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + VisitCondition(comp); } -void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* equal) { - __ cmpq(equal->GetLocations()->InAt(0).AsX86_64().AsCpuRegister(), - equal->GetLocations()->InAt(1).AsX86_64().AsCpuRegister()); - __ setcc(kEqual, equal->GetLocations()->Out().AsX86_64().AsCpuRegister()); +void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + VisitCondition(comp); } void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) { diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index c3baf1a7b7..fd534ced1f 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -51,7 +51,12 @@ class InternalCodeAllocator : public CodeAllocator { static void Run(const InternalCodeAllocator& allocator, bool has_result, int32_t expected) { typedef int32_t (*fptr)(); CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); - int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())(); + fptr f = reinterpret_cast<fptr>(allocator.GetMemory()); +#if defined(__arm__) + // For thumb we need the bottom bit set. + f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1); +#endif + int32_t result = f(); if (has_result) { CHECK_EQ(result, expected); } diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc index 371478c9e7..c59f8366fa 100644 --- a/compiler/optimizing/graph_test.cc +++ b/compiler/optimizing/graph_test.cc @@ -30,7 +30,9 @@ static HBasicBlock* createIfBlock(HGraph* graph, ArenaAllocator* allocator) { graph->AddBlock(if_block); HInstruction* instr = new (allocator) HIntConstant(4); if_block->AddInstruction(instr); - instr = new (allocator) HIf(instr); + HInstruction* equal = new (allocator) HEqual(instr, instr); + if_block->AddInstruction(equal); + instr = new (allocator) HIf(equal); if_block->AddInstruction(instr); return if_block; } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 2a97fadbaf..490d345826 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -445,4 +445,23 @@ void HGraphVisitor::VisitBasicBlock(HBasicBlock* block) { } } + +bool HCondition::NeedsMaterialization() const { + if (!HasOnlyOneUse()) { + return true; + } + HUseListNode<HInstruction>* uses = GetUses(); + HInstruction* user = uses->GetUser(); + if (!user->IsIf()) { + return true; + } + + // TODO: should we allow intervening instructions with no side-effect between this condition + // and the If instruction? + if (GetNext() != user) { + return true; + } + return false; +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 143d5c9e6f..503f31d990 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -38,6 +38,15 @@ static const int kDefaultNumberOfSuccessors = 2; static const int kDefaultNumberOfPredecessors = 2; static const int kDefaultNumberOfBackEdges = 1; +enum IfCondition { + kCondEQ, + kCondNE, + kCondLT, + kCondLE, + kCondGT, + kCondGE, +}; + class HInstructionList { public: HInstructionList() : first_instruction_(nullptr), last_instruction_(nullptr) {} @@ -66,7 +75,7 @@ class HGraph : public ArenaObject { maximum_number_of_out_vregs_(0), number_of_vregs_(0), number_of_in_vregs_(0), - current_instruction_id_(0) { } + current_instruction_id_(0) {} ArenaAllocator* GetArena() const { return arena_; } const GrowableArray<HBasicBlock*>& GetBlocks() const { return blocks_; } @@ -381,7 +390,13 @@ class HBasicBlock : public ArenaObject { #define FOR_EACH_INSTRUCTION(M) \ M(Add) \ + M(Condition) \ M(Equal) \ + M(NotEqual) \ + M(LessThan) \ + M(LessThanOrEqual) \ + M(GreaterThan) \ + M(GreaterThanOrEqual) \ M(Exit) \ M(Goto) \ M(If) \ @@ -400,6 +415,7 @@ class HBasicBlock : public ArenaObject { M(StoreLocal) \ M(Sub) \ + #define FORWARD_DECLARATION(type) class H##type; FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) #undef FORWARD_DECLARATION @@ -413,7 +429,7 @@ template <typename T> class HUseListNode : public ArenaObject { public: HUseListNode(T* user, size_t index, HUseListNode* tail) - : user_(user), index_(index), tail_(tail) { } + : user_(user), index_(index), tail_(tail) {} HUseListNode* GetTail() const { return tail_; } T* GetUser() const { return user_; } @@ -444,7 +460,7 @@ class HInstruction : public ArenaObject { live_interval_(nullptr), lifetime_position_(kNoLifetime) {} - virtual ~HInstruction() { } + virtual ~HInstruction() {} HInstruction* GetNext() const { return next_; } HInstruction* GetPrevious() const { return previous_; } @@ -507,6 +523,10 @@ class HInstruction : public ArenaObject { void ReplaceWith(HInstruction* instruction); + bool HasOnlyOneUse() const { + return uses_ != nullptr && uses_->GetTail() == nullptr; + } + #define INSTRUCTION_TYPE_CHECK(type) \ bool Is##type() { return (As##type() != nullptr); } \ virtual H##type* As##type() { return nullptr; } @@ -616,7 +636,7 @@ class HEnvironment : public ArenaObject { class HInputIterator : public ValueObject { public: - explicit HInputIterator(HInstruction* instruction) : instruction_(instruction), index_(0) { } + explicit HInputIterator(HInstruction* instruction) : instruction_(instruction), index_(0) {} bool Done() const { return index_ == instruction_->InputCount(); } HInstruction* Current() const { return instruction_->InputAt(index_); } @@ -676,7 +696,7 @@ class HBackwardInstructionIterator : public ValueObject { template<typename T, intptr_t N> class EmbeddedArray { public: - EmbeddedArray() : elements_() { } + EmbeddedArray() : elements_() {} intptr_t GetLength() const { return N; } @@ -721,8 +741,8 @@ class EmbeddedArray<T, 0> { template<intptr_t N> class HTemplateInstruction: public HInstruction { public: - HTemplateInstruction<N>() : inputs_() { } - virtual ~HTemplateInstruction() { } + HTemplateInstruction<N>() : inputs_() {} + virtual ~HTemplateInstruction() {} virtual size_t InputCount() const { return N; } virtual HInstruction* InputAt(size_t i) const { return inputs_[i]; } @@ -738,6 +758,18 @@ class HTemplateInstruction: public HInstruction { friend class SsaBuilder; }; +template<intptr_t N> +class HExpression: public HTemplateInstruction<N> { + public: + explicit HExpression<N>(Primitive::Type type) : type_(type) {} + virtual ~HExpression() {} + + virtual Primitive::Type GetType() const { return type_; } + + private: + const Primitive::Type type_; +}; + // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow // instruction that branches to the exit block. class HReturnVoid : public HTemplateInstruction<0> { @@ -800,6 +832,7 @@ class HGoto : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HGoto); }; + // Conditional branch. A block ending with an HIf instruction must have // two successors. class HIf : public HTemplateInstruction<1> { @@ -820,53 +853,143 @@ class HIf : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(If); + virtual bool IsIfInstruction() const { return true; } + private: DISALLOW_COPY_AND_ASSIGN(HIf); }; -class HBinaryOperation : public HTemplateInstruction<2> { +class HBinaryOperation : public HExpression<2> { public: HBinaryOperation(Primitive::Type result_type, HInstruction* left, - HInstruction* right) : result_type_(result_type) { + HInstruction* right) : HExpression(result_type) { SetRawInputAt(0, left); SetRawInputAt(1, right); } HInstruction* GetLeft() const { return InputAt(0); } HInstruction* GetRight() const { return InputAt(1); } - Primitive::Type GetResultType() const { return result_type_; } + Primitive::Type GetResultType() const { return GetType(); } virtual bool IsCommutative() { return false; } - virtual Primitive::Type GetType() const { return GetResultType(); } private: - const Primitive::Type result_type_; - DISALLOW_COPY_AND_ASSIGN(HBinaryOperation); }; - -// Instruction to check if two inputs are equal to each other. -class HEqual : public HBinaryOperation { +class HCondition : public HBinaryOperation { public: - HEqual(HInstruction* first, HInstruction* second) + HCondition(HInstruction* first, HInstruction* second) : HBinaryOperation(Primitive::kPrimBoolean, first, second) {} virtual bool IsCommutative() { return true; } + bool NeedsMaterialization() const; - virtual Primitive::Type GetType() const { return Primitive::kPrimBoolean; } + DECLARE_INSTRUCTION(Condition); + + virtual IfCondition GetCondition() const = 0; + + private: + DISALLOW_COPY_AND_ASSIGN(HCondition); +}; + +// Instruction to check if two inputs are equal to each other. +class HEqual : public HCondition { + public: + HEqual(HInstruction* first, HInstruction* second) + : HCondition(first, second) {} DECLARE_INSTRUCTION(Equal); + virtual IfCondition GetCondition() const { + return kCondEQ; + } + private: DISALLOW_COPY_AND_ASSIGN(HEqual); }; +class HNotEqual : public HCondition { + public: + HNotEqual(HInstruction* first, HInstruction* second) + : HCondition(first, second) {} + + DECLARE_INSTRUCTION(NotEqual); + + virtual IfCondition GetCondition() const { + return kCondNE; + } + + private: + DISALLOW_COPY_AND_ASSIGN(HNotEqual); +}; + +class HLessThan : public HCondition { + public: + HLessThan(HInstruction* first, HInstruction* second) + : HCondition(first, second) {} + + DECLARE_INSTRUCTION(LessThan); + + virtual IfCondition GetCondition() const { + return kCondLT; + } + + private: + DISALLOW_COPY_AND_ASSIGN(HLessThan); +}; + +class HLessThanOrEqual : public HCondition { + public: + HLessThanOrEqual(HInstruction* first, HInstruction* second) + : HCondition(first, second) {} + + DECLARE_INSTRUCTION(LessThanOrEqual); + + virtual IfCondition GetCondition() const { + return kCondLE; + } + + private: + DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual); +}; + +class HGreaterThan : public HCondition { + public: + HGreaterThan(HInstruction* first, HInstruction* second) + : HCondition(first, second) {} + + DECLARE_INSTRUCTION(GreaterThan); + + virtual IfCondition GetCondition() const { + return kCondGT; + } + + private: + DISALLOW_COPY_AND_ASSIGN(HGreaterThan); +}; + +class HGreaterThanOrEqual : public HCondition { + public: + HGreaterThanOrEqual(HInstruction* first, HInstruction* second) + : HCondition(first, second) {} + + DECLARE_INSTRUCTION(GreaterThanOrEqual); + + virtual IfCondition GetCondition() const { + return kCondGE; + } + + private: + DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual); +}; + + // A local in the graph. Corresponds to a Dex register. class HLocal : public HTemplateInstruction<0> { public: - explicit HLocal(uint16_t reg_number) : reg_number_(reg_number) { } + explicit HLocal(uint16_t reg_number) : reg_number_(reg_number) {} DECLARE_INSTRUCTION(Local); @@ -880,21 +1003,17 @@ class HLocal : public HTemplateInstruction<0> { }; // Load a given local. The local is an input of this instruction. -class HLoadLocal : public HTemplateInstruction<1> { +class HLoadLocal : public HExpression<1> { public: - explicit HLoadLocal(HLocal* local, Primitive::Type type) : type_(type) { + explicit HLoadLocal(HLocal* local, Primitive::Type type) : HExpression(type) { SetRawInputAt(0, local); } - virtual Primitive::Type GetType() const { return type_; } - HLocal* GetLocal() const { return reinterpret_cast<HLocal*>(InputAt(0)); } DECLARE_INSTRUCTION(LoadLocal); private: - const Primitive::Type type_; - DISALLOW_COPY_AND_ASSIGN(HLoadLocal); }; @@ -917,12 +1036,11 @@ class HStoreLocal : public HTemplateInstruction<2> { // Constants of the type int. Those can be from Dex instructions, or // synthesized (for example with the if-eqz instruction). -class HIntConstant : public HTemplateInstruction<0> { +class HIntConstant : public HExpression<0> { public: - explicit HIntConstant(int32_t value) : value_(value) { } + explicit HIntConstant(int32_t value) : HExpression(Primitive::kPrimInt), value_(value) {} int32_t GetValue() const { return value_; } - virtual Primitive::Type GetType() const { return Primitive::kPrimInt; } DECLARE_INSTRUCTION(IntConstant); @@ -932,9 +1050,9 @@ class HIntConstant : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HIntConstant); }; -class HLongConstant : public HTemplateInstruction<0> { +class HLongConstant : public HExpression<0> { public: - explicit HLongConstant(int64_t value) : value_(value) { } + explicit HLongConstant(int64_t value) : HExpression(Primitive::kPrimLong), value_(value) {} int64_t GetValue() const { return value_; } @@ -1008,15 +1126,14 @@ class HInvokeStatic : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeStatic); }; -class HNewInstance : public HTemplateInstruction<0> { +class HNewInstance : public HExpression<0> { public: - HNewInstance(uint32_t dex_pc, uint16_t type_index) : dex_pc_(dex_pc), type_index_(type_index) {} + HNewInstance(uint32_t dex_pc, uint16_t type_index) : HExpression(Primitive::kPrimNot), + dex_pc_(dex_pc), type_index_(type_index) {} uint32_t GetDexPc() const { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } - virtual Primitive::Type GetType() const { return Primitive::kPrimNot; } - // Calls runtime so needs an environment. virtual bool NeedsEnvironment() const { return true; } @@ -1057,15 +1174,13 @@ class HSub : public HBinaryOperation { // The value of a parameter in this method. Its location depends on // the calling convention. -class HParameterValue : public HTemplateInstruction<0> { +class HParameterValue : public HExpression<0> { public: HParameterValue(uint8_t index, Primitive::Type parameter_type) - : index_(index), parameter_type_(parameter_type) {} + : HExpression(parameter_type), index_(index) {} uint8_t GetIndex() const { return index_; } - virtual Primitive::Type GetType() const { return parameter_type_; } - DECLARE_INSTRUCTION(ParameterValue); private: @@ -1073,19 +1188,15 @@ class HParameterValue : public HTemplateInstruction<0> { // than HGraph::number_of_in_vregs_; const uint8_t index_; - const Primitive::Type parameter_type_; - DISALLOW_COPY_AND_ASSIGN(HParameterValue); }; -class HNot : public HTemplateInstruction<1> { +class HNot : public HExpression<1> { public: - explicit HNot(HInstruction* input) { + explicit HNot(HInstruction* input) : HExpression(Primitive::kPrimBoolean) { SetRawInputAt(0, input); } - virtual Primitive::Type GetType() const { return Primitive::kPrimBoolean; } - DECLARE_INSTRUCTION(Not); private: @@ -1210,10 +1321,10 @@ class HParallelMove : public HTemplateInstruction<0> { class HGraphVisitor : public ValueObject { public: - explicit HGraphVisitor(HGraph* graph) : graph_(graph) { } - virtual ~HGraphVisitor() { } + explicit HGraphVisitor(HGraph* graph) : graph_(graph) {} + virtual ~HGraphVisitor() {} - virtual void VisitInstruction(HInstruction* instruction) { } + virtual void VisitInstruction(HInstruction* instruction) {} virtual void VisitBasicBlock(HBasicBlock* block); void VisitInsertionOrder(); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index ccacbef401..56029aa30b 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -101,10 +101,6 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite } InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet(); - // The optimizing compiler currently does not have a Thumb2 assembler. - if (instruction_set == kThumb2) { - instruction_set = kArm; - } CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set); if (codegen == nullptr) { if (shouldCompile) { diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 348e9d4921..1f4cb41582 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -100,6 +100,9 @@ void RegisterAllocator::BlockRegister(Location location, interval->AddRange(start, end); } +// TODO: make the register allocator understand instructions like HCondition +// that may not need to be materialized. It doesn't need to allocate any +// registers for it. void RegisterAllocator::AllocateRegistersInternal() { number_of_registers_ = processing_core_registers_ ? codegen_->GetNumberOfCoreRegisters() diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc index ac84d6ae40..d5225c1f73 100644 --- a/compiler/trampolines/trampoline_compiler.cc +++ b/compiler/trampolines/trampoline_compiler.cc @@ -30,11 +30,7 @@ namespace art { namespace arm { static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, ThreadOffset<4> offset) { - // NOTE: the assembler used here is ARM, not Thumb. This is because the address - // returned by this function is a pointer and for thumb we would have to set the - // bottom bit. It doesn't matter since the instructions generated are the same - // size anyway. - std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kArm))); + std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kThumb2))); switch (abi) { case kInterpreterAbi: // Thread* is first argument (R0) in interpreter ABI. diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 703d68e0b3..92a9f533ea 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -329,7 +329,7 @@ void Thumb2Assembler::ldm(BlockAddressMode am, ++reg; } CHECK_LT(reg, 16); - CHECK(am == DB_W); // Only writeback is supported. + CHECK(am == IA_W); // Only writeback is supported. ldr(static_cast<Register>(reg), Address(base, kRegisterSize, Address::PostIndex), cond); } else { EmitMultiMemOp(cond, am, true, base, regs); @@ -352,8 +352,8 @@ void Thumb2Assembler::stm(BlockAddressMode am, ++reg; } CHECK_LT(reg, 16); - CHECK(am == IA || am == IA_W); - Address::Mode strmode = am == IA ? Address::PreIndex : Address::Offset; + CHECK(am == DB || am == DB_W); + Address::Mode strmode = am == DB_W ? Address::PreIndex : Address::Offset; str(static_cast<Register>(reg), Address(base, -kRegisterSize, strmode), cond); } else { EmitMultiMemOp(cond, am, false, base, regs); @@ -642,6 +642,7 @@ bool Thumb2Assembler::Is32BitDataProcessing(Condition cond, if (imm > (1 << 9)) { // 9 bit immediate. return true; } + return false; // 16 bit good. } else if (opcode == ADD && rd != SP && rn == SP) { // 10 bit immediate. if (imm > (1 << 10)) { return true; diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index a793513191..2d5514063f 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -367,12 +367,12 @@ class Dex2Oat { driver->CompileAll(class_loader, dex_files, &timings); - timings.NewSplit("dex2oat OatWriter"); + TimingLogger::ScopedTiming t2("dex2oat OatWriter", &timings); std::string image_file_location; uint32_t image_file_location_oat_checksum = 0; uintptr_t image_file_location_oat_data_begin = 0; if (!driver->IsImage()) { - TimingLogger::ScopedSplit split("Loading image checksum", &timings); + TimingLogger::ScopedTiming t3("Loading image checksum", &timings); gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace(); image_file_location_oat_checksum = image_space->GetImageHeader().GetOatChecksum(); image_file_location_oat_data_begin = @@ -380,14 +380,13 @@ class Dex2Oat { image_file_location = image_space->GetImageFilename(); } - OatWriter oat_writer(dex_files, - image_file_location_oat_checksum, + OatWriter oat_writer(dex_files, image_file_location_oat_checksum, image_file_location_oat_data_begin, image_file_location, driver.get(), &timings); - TimingLogger::ScopedSplit split("Writing ELF", &timings); + t2.NewTiming("Writing ELF"); if (!driver->WriteElf(android_root, is_host, dex_files, &oat_writer, oat_file)) { LOG(ERROR) << "Failed to write ELF file " << oat_file->GetPath(); return nullptr; @@ -748,6 +747,7 @@ void CheckExplicitCheckOptions(InstructionSet isa, bool* explicit_null_checks, bool* explicit_so_checks, bool* explicit_suspend_checks) { switch (isa) { case kArm: + case kThumb2: break; // All checks implemented, leave as is. default: // No checks implemented, reset all to explicit checks. @@ -1039,8 +1039,8 @@ static int dex2oat(int argc, char** argv) { } else { Usage("--implicit-checks passed non-recognized value %s", val.c_str()); } - has_explicit_checks_options = true; } + has_explicit_checks_options = true; } else { Usage("Unknown argument %s", option.data()); } @@ -1076,7 +1076,7 @@ static int dex2oat(int argc, char** argv) { bool image = (!image_filename.empty()); if (!image && boot_image_filename.empty()) { - boot_image_filename += GetAndroidRoot(); + boot_image_filename += android_root; boot_image_filename += "/framework/boot.art"; } std::string boot_image_option; @@ -1170,6 +1170,7 @@ static int dex2oat(int argc, char** argv) { CheckExplicitCheckOptions(instruction_set, &explicit_null_checks, &explicit_so_checks, &explicit_suspend_checks); + LOG(INFO) << "init compiler options for explicit null: " << explicit_null_checks; CompilerOptions compiler_options(compiler_filter, huge_method_threshold, large_method_threshold, @@ -1211,7 +1212,7 @@ static int dex2oat(int argc, char** argv) { return EXIT_FAILURE; } - timings.StartSplit("dex2oat Setup"); + timings.StartTiming("dex2oat Setup"); LOG(INFO) << CommandLine(); Runtime::Options runtime_options; @@ -1256,7 +1257,17 @@ static int dex2oat(int argc, char** argv) { // TODO: Not sure whether it's a good idea to allow anything else but the runtime option in // this case at all, as we'll have to throw away produced code for a mismatch. if (!has_explicit_checks_options) { - if (instruction_set == kRuntimeISA) { + bool cross_compiling = true; + switch (kRuntimeISA) { + case kArm: + case kThumb2: + cross_compiling = instruction_set != kArm && instruction_set != kThumb2; + break; + default: + cross_compiling = instruction_set != kRuntimeISA; + break; + } + if (!cross_compiling) { Runtime* runtime = Runtime::Current(); compiler_options.SetExplicitNullChecks(runtime->ExplicitNullChecks()); compiler_options.SetExplicitStackOverflowChecks(runtime->ExplicitStackOverflowChecks()); @@ -1436,7 +1447,7 @@ static int dex2oat(int argc, char** argv) { // Elf32_Phdr.p_vaddr values by the desired base address. // if (image) { - timings.NewSplit("dex2oat ImageWriter"); + TimingLogger::ScopedTiming t("dex2oat ImageWriter", &timings); bool image_creation_success = dex2oat->CreateImageFile(image_filename, image_base, oat_unstripped, @@ -1449,6 +1460,7 @@ static int dex2oat(int argc, char** argv) { } if (is_host) { + timings.EndTiming(); if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) { LOG(INFO) << Dumpable<TimingLogger>(timings); } @@ -1461,7 +1473,7 @@ static int dex2oat(int argc, char** argv) { // If we don't want to strip in place, copy from unstripped location to stripped location. // We need to strip after image creation because FixupElf needs to use .strtab. if (oat_unstripped != oat_stripped) { - timings.NewSplit("dex2oat OatFile copy"); + TimingLogger::ScopedTiming t("dex2oat OatFile copy", &timings); oat_file.reset(); std::unique_ptr<File> in(OS::OpenFileForReading(oat_unstripped.c_str())); std::unique_ptr<File> out(OS::CreateEmptyFile(oat_stripped.c_str())); @@ -1496,7 +1508,7 @@ static int dex2oat(int argc, char** argv) { } #endif // ART_USE_PORTABLE_COMPILER - timings.EndSplit(); + timings.EndTiming(); if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) { LOG(INFO) << Dumpable<TimingLogger>(timings); diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc index 4e4a512713..1f565e504a 100644 --- a/disassembler/disassembler_arm.cc +++ b/disassembler/disassembler_arm.cc @@ -269,18 +269,34 @@ void DisassemblerArm::DumpArm(std::ostream& os, const uint8_t* instr_ptr) { uint32_t op = (instruction >> 21) & 0xf; opcode = kDataProcessingOperations[op]; bool implicit_s = ((op & ~3) == 8); // TST, TEQ, CMP, and CMN. - if (implicit_s) { - // Rd is unused (and not shown), and we don't show the 's' suffix either. - } else { + bool is_mov = op == 0b1101 || op == 0b1111; + if (is_mov) { + // Show only Rd and Rm. if (s) { - suffixes += 's'; - } - args << ArmRegister(instruction, 12) << ", "; - } - if (i) { - args << ArmRegister(instruction, 16) << ", " << ShiftedImmediate(instruction); + suffixes += 's'; + } + args << ArmRegister(instruction, 12) << ", "; + if (i) { + args << ShiftedImmediate(instruction); + } else { + // TODO: Shifted register. + args << ArmRegister(instruction, 16) << ", " << ArmRegister(instruction, 0); + } } else { - args << Rm(instruction); + if (implicit_s) { + // Rd is unused (and not shown), and we don't show the 's' suffix either. + } else { + if (s) { + suffixes += 's'; + } + args << ArmRegister(instruction, 12) << ", "; + } + if (i) { + args << ArmRegister(instruction, 16) << ", " << ShiftedImmediate(instruction); + } else { + // TODO: Shifted register. + args << ArmRegister(instruction, 16) << ", " << ArmRegister(instruction, 0); + } } } break; @@ -1291,7 +1307,7 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) int32_t imm32 = (imm8 << 24) >> 24; // sign-extend imm8 if (Rn.r == 13 && P == 1 && U == 0 && W == 1 && imm32 == 4) { opcode << "push"; - args << Rt; + args << "{" << Rt << "}"; } else if (Rn.r == 15 || (P == 0 && W == 0)) { opcode << "UNDEFINED"; } else { @@ -1443,10 +1459,33 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) } args << "]"; } else { - // LDRT Rt, [Rn, #imm8] - 111 11 00 00 101 nnnn tttt 1110iiiiiiii - uint32_t imm8 = instr & 0xFF; - opcode << "ldrt"; - args << Rt << ", [" << Rn << ", #" << imm8 << "]"; + bool p = (instr & (1 << 10)) != 0; + bool w = (instr & (1 << 8)) != 0; + bool u = (instr & (1 << 9)) != 0; + if (p && u && !w) { + // LDRT Rt, [Rn, #imm8] - 111 11 00 00 101 nnnn tttt 1110iiiiiiii + uint32_t imm8 = instr & 0xFF; + opcode << "ldrt"; + args << Rt << ", [" << Rn << ", #" << imm8 << "]"; + } else if (Rn.r == 13 && !p && u && w && (instr & 0xff) == 4) { + // POP + opcode << "pop"; + args << "{" << Rt << "}"; + } else { + bool wback = !p || w; + uint32_t offset = (instr & 0xff); + opcode << "ldr.w"; + args << Rt << ","; + if (p && !wback) { + args << "[" << Rn << ", #" << offset << "]"; + } else if (p && wback) { + args << "[" << Rn << ", #" << offset << "]!"; + } else if (!p && wback) { + args << "[" << Rn << "], #" << offset; + } else { + LOG(FATAL) << p << " " << w; + } + } } break; } diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index e6a6860626..b012bc1cc1 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -56,10 +56,16 @@ static const char* gReg64Names[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }; +// 64-bit opcode REX modifier. +constexpr uint8_t REX_W = 0b1000; +constexpr uint8_t REX_R = 0b0100; +constexpr uint8_t REX_X = 0b0010; +constexpr uint8_t REX_B = 0b0001; + static void DumpReg0(std::ostream& os, uint8_t rex, size_t reg, bool byte_operand, uint8_t size_override) { DCHECK_LT(reg, (rex == 0) ? 8u : 16u); - bool rex_w = (rex & 0b1000) != 0; + bool rex_w = (rex & REX_W) != 0; if (byte_operand) { os << ((rex == 0) ? gReg8Names[reg] : gExtReg8Names[reg]); } else if (rex_w) { @@ -86,14 +92,14 @@ static void DumpAnyReg(std::ostream& os, uint8_t rex, size_t reg, static void DumpReg(std::ostream& os, uint8_t rex, uint8_t reg, bool byte_operand, uint8_t size_override, RegFile reg_file) { - bool rex_r = (rex & 0b0100) != 0; + bool rex_r = (rex & REX_R) != 0; size_t reg_num = rex_r ? (reg + 8) : reg; DumpAnyReg(os, rex, reg_num, byte_operand, size_override, reg_file); } static void DumpRmReg(std::ostream& os, uint8_t rex, uint8_t reg, bool byte_operand, uint8_t size_override, RegFile reg_file) { - bool rex_b = (rex & 0b0001) != 0; + bool rex_b = (rex & REX_B) != 0; size_t reg_num = rex_b ? (reg + 8) : reg; DumpAnyReg(os, rex, reg_num, byte_operand, size_override, reg_file); } @@ -107,19 +113,19 @@ static void DumpAddrReg(std::ostream& os, uint8_t rex, uint8_t reg) { } static void DumpBaseReg(std::ostream& os, uint8_t rex, uint8_t reg) { - bool rex_b = (rex & 0b0001) != 0; + bool rex_b = (rex & REX_B) != 0; size_t reg_num = rex_b ? (reg + 8) : reg; DumpAddrReg(os, rex, reg_num); } static void DumpIndexReg(std::ostream& os, uint8_t rex, uint8_t reg) { - bool rex_x = (rex & 0b0010) != 0; + bool rex_x = (rex & REX_X) != 0; uint8_t reg_num = rex_x ? (reg + 8) : reg; DumpAddrReg(os, rex, reg_num); } static void DumpOpcodeReg(std::ostream& os, uint8_t rex, uint8_t reg) { - bool rex_b = (rex & 0b0001) != 0; + bool rex_b = (rex & REX_B) != 0; size_t reg_num = rex_b ? (reg + 8) : reg; DumpReg0(os, rex, reg_num, false, 0); } @@ -896,6 +902,7 @@ DISASSEMBLER_ENTRY(cmp, case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: opcode << "mov"; immediate_bytes = 1; + byte_operand = true; reg_in_opcode = true; break; case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF: @@ -916,6 +923,15 @@ DISASSEMBLER_ENTRY(cmp, byte_operand = (*instr == 0xC0); break; case 0xC3: opcode << "ret"; break; + case 0xC6: + static const char* c6_opcodes[] = {"mov", "unknown-c6", "unknown-c6", "unknown-c6", "unknown-c6", "unknown-c6", "unknown-c6", "unknown-c6"}; + modrm_opcodes = c6_opcodes; + store = true; + immediate_bytes = 1; + has_modrm = true; + reg_is_opcode = true; + byte_operand = true; + break; case 0xC7: static const char* c7_opcodes[] = {"mov", "unknown-c7", "unknown-c7", "unknown-c7", "unknown-c7", "unknown-c7", "unknown-c7", "unknown-c7"}; modrm_opcodes = c7_opcodes; @@ -1064,6 +1080,16 @@ DISASSEMBLER_ENTRY(cmp, if (reg_is_opcode && modrm_opcodes != NULL) { opcode << modrm_opcodes[reg_or_opcode]; } + + // Add opcode suffixes to indicate size. + if (byte_operand) { + opcode << 'b'; + } else if ((rex & REX_W) != 0) { + opcode << 'q'; + } else if (prefix[2] == 0x66) { + opcode << 'w'; + } + if (load) { if (!reg_is_opcode) { DumpReg(args, rex, reg_or_opcode, byte_operand, prefix[2], dst_reg_file); diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h index 7c0999992e..4c18ce405c 100644 --- a/runtime/base/histogram-inl.h +++ b/runtime/base/histogram-inl.h @@ -164,18 +164,18 @@ inline void Histogram<Value>::PrintBins(std::ostream& os, const CumulativeData& template <class Value> inline void Histogram<Value>::PrintConfidenceIntervals(std::ostream &os, double interval, const CumulativeData& data) const { + static constexpr size_t kFractionalDigits = 3; DCHECK_GT(interval, 0); DCHECK_LT(interval, 1.0); - - double per_0 = (1.0 - interval) / 2.0; - double per_1 = per_0 + interval; - TimeUnit unit = GetAppropriateTimeUnit(Mean() * kAdjust); - os << Name() << ":\tSum: "; - os << PrettyDuration(Sum() * kAdjust) << " "; - os << (interval * 100) << "% C.I. " << FormatDuration(Percentile(per_0, data) * kAdjust, unit); - os << "-" << FormatDuration(Percentile(per_1, data) * kAdjust, unit) << " "; - os << "Avg: " << FormatDuration(Mean() * kAdjust, unit) << " Max: "; - os << FormatDuration(Max() * kAdjust, unit) << "\n"; + const double per_0 = (1.0 - interval) / 2.0; + const double per_1 = per_0 + interval; + const TimeUnit unit = GetAppropriateTimeUnit(Mean() * kAdjust); + os << Name() << ":\tSum: " << PrettyDuration(Sum() * kAdjust) << " " + << (interval * 100) << "% C.I. " << FormatDuration(Percentile(per_0, data) * kAdjust, unit, + kFractionalDigits) + << "-" << FormatDuration(Percentile(per_1, data) * kAdjust, unit, kFractionalDigits) << " " + << "Avg: " << FormatDuration(Mean() * kAdjust, unit, kFractionalDigits) << " Max: " + << FormatDuration(Max() * kAdjust, unit, kFractionalDigits) << "\n"; } template <class Value> diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc index a1550028e9..b6a2aaf33b 100644 --- a/runtime/base/timing_logger.cc +++ b/runtime/base/timing_logger.cc @@ -33,6 +33,8 @@ namespace art { constexpr size_t CumulativeLogger::kLowMemoryBucketCount; constexpr size_t CumulativeLogger::kDefaultBucketCount; +constexpr size_t TimingLogger::kIndexNotFound; + CumulativeLogger::CumulativeLogger(const std::string& name) : name_(name), lock_name_("CumulativeLoggerLock" + name), @@ -66,10 +68,12 @@ void CumulativeLogger::Reset() { void CumulativeLogger::AddLogger(const TimingLogger &logger) { MutexLock mu(Thread::Current(), lock_); - for (const TimingLogger::SplitTiming& split : logger.GetSplits()) { - uint64_t split_time = split.first; - const char* split_name = split.second; - AddPair(split_name, split_time); + TimingLogger::TimingData timing_data(logger.CalculateTimingData()); + const std::vector<TimingLogger::Timing>& timings = logger.GetTimings(); + for (size_t i = 0; i < timings.size(); ++i) { + if (timings[i].IsStartTiming()) { + AddPair(timings[i].GetName(), timing_data.GetExclusiveTime(i)); + } } ++iterations_; } @@ -124,166 +128,125 @@ void CumulativeLogger::DumpHistogram(std::ostream &os) const { } TimingLogger::TimingLogger(const char* name, bool precise, bool verbose) - : name_(name), precise_(precise), verbose_(verbose), current_split_(NULL) { + : name_(name), precise_(precise), verbose_(verbose) { } void TimingLogger::Reset() { - current_split_ = NULL; - splits_.clear(); + timings_.clear(); } -void TimingLogger::StartSplit(const char* new_split_label) { - DCHECK(new_split_label != nullptr) << "Starting split with null label."; - TimingLogger::ScopedSplit* explicit_scoped_split = - new TimingLogger::ScopedSplit(new_split_label, this); - explicit_scoped_split->explicit_ = true; +void TimingLogger::StartTiming(const char* label) { + DCHECK(label != nullptr); + timings_.push_back(Timing(NanoTime(), label)); + ATRACE_BEGIN(label); } -void TimingLogger::EndSplit() { - CHECK(current_split_ != nullptr) << "Ending a non-existent split."; - DCHECK(current_split_->label_ != nullptr); - DCHECK(current_split_->explicit_ == true) - << "Explicitly ending scoped split: " << current_split_->label_; - delete current_split_; - // TODO: current_split_ = nullptr; +void TimingLogger::EndTiming() { + timings_.push_back(Timing(NanoTime(), nullptr)); + ATRACE_END(); } -// Ends the current split and starts the one given by the label. -void TimingLogger::NewSplit(const char* new_split_label) { - if (current_split_ == nullptr) { - StartSplit(new_split_label); - } else { - DCHECK(new_split_label != nullptr) << "New split (" << new_split_label << ") with null label."; - current_split_->TailInsertSplit(new_split_label); +uint64_t TimingLogger::GetTotalNs() const { + if (timings_.size() < 2) { + return 0; } + return timings_.back().GetTime() - timings_.front().GetTime(); } -uint64_t TimingLogger::GetTotalNs() const { - uint64_t total_ns = 0; - for (const TimingLogger::SplitTiming& split : splits_) { - total_ns += split.first; +size_t TimingLogger::FindTimingIndex(const char* name, size_t start_idx) const { + DCHECK_LT(start_idx, timings_.size()); + for (size_t i = start_idx; i < timings_.size(); ++i) { + if (timings_[i].IsStartTiming() && strcmp(timings_[i].GetName(), name) == 0) { + return i; + } } - return total_ns; + return kIndexNotFound; +} + +TimingLogger::TimingData TimingLogger::CalculateTimingData() const { + TimingLogger::TimingData ret; + ret.data_.resize(timings_.size()); + std::vector<size_t> open_stack; + for (size_t i = 0; i < timings_.size(); ++i) { + if (timings_[i].IsEndTiming()) { + CHECK(!open_stack.empty()) << "No starting split for ending split at index " << i; + size_t open_idx = open_stack.back(); + uint64_t time = timings_[i].GetTime() - timings_[open_idx].GetTime(); + ret.data_[open_idx].exclusive_time += time; + DCHECK_EQ(ret.data_[open_idx].total_time, 0U); + ret.data_[open_idx].total_time += time; + // Each open split has exactly one end. + open_stack.pop_back(); + // If there is a parent node, subtract from the exclusive time. + if (!open_stack.empty()) { + // Note this may go negative, but will work due to 2s complement when we add the value + // total time value later. + ret.data_[open_stack.back()].exclusive_time -= time; + } + } else { + open_stack.push_back(i); + } + } + CHECK(open_stack.empty()) << "Missing ending for timing " + << timings_[open_stack.back()].GetName() << " at index " << open_stack.back(); + return ret; // No need to fear, C++11 move semantics are here. } -void TimingLogger::Dump(std::ostream &os) const { +void TimingLogger::Dump(std::ostream &os, const char* indent_string) const { + static constexpr size_t kFractionalDigits = 3; + TimingLogger::TimingData timing_data(CalculateTimingData()); uint64_t longest_split = 0; - uint64_t total_ns = 0; - for (const SplitTiming& split : splits_) { - uint64_t split_time = split.first; - longest_split = std::max(longest_split, split_time); - total_ns += split_time; + for (size_t i = 0; i < timings_.size(); ++i) { + longest_split = std::max(longest_split, timing_data.GetTotalTime(i)); } // Compute which type of unit we will use for printing the timings. TimeUnit tu = GetAppropriateTimeUnit(longest_split); uint64_t divisor = GetNsToTimeUnitDivisor(tu); + uint64_t mod_fraction = divisor >= 1000 ? divisor / 1000 : 1; // Print formatted splits. - for (const SplitTiming& split : splits_) { - uint64_t split_time = split.first; - if (!precise_ && divisor >= 1000) { - // Make the fractional part 0. - split_time -= split_time % (divisor / 1000); + size_t tab_count = 1; + os << name_ << " [Exclusive time] [Total time]\n"; + for (size_t i = 0; i < timings_.size(); ++i) { + if (timings_[i].IsStartTiming()) { + uint64_t exclusive_time = timing_data.GetExclusiveTime(i); + uint64_t total_time = timing_data.GetTotalTime(i); + if (!precise_) { + // Make the fractional part 0. + exclusive_time -= exclusive_time % mod_fraction; + total_time -= total_time % mod_fraction; + } + for (size_t j = 0; j < tab_count; ++j) { + os << indent_string; + } + os << FormatDuration(exclusive_time, tu, kFractionalDigits); + // If they are the same, just print one value to prevent spam. + if (exclusive_time != total_time) { + os << "/" << FormatDuration(total_time, tu, kFractionalDigits); + } + os << " " << timings_[i].GetName() << "\n"; + ++tab_count; + } else { + --tab_count; } - os << name_ << ": " << std::setw(8) << FormatDuration(split_time, tu) << " " - << split.second << "\n"; } - os << name_ << ": end, " << NsToMs(total_ns) << " ms\n"; + os << name_ << ": end, " << PrettyDuration(GetTotalNs()) << "\n"; } -TimingLogger::ScopedSplit::ScopedSplit(const char* label, TimingLogger* timing_logger) { - DCHECK(label != NULL) << "New scoped split (" << label << ") with null label."; - CHECK(timing_logger != NULL) << "New scoped split (" << label << ") without TimingLogger."; - timing_logger_ = timing_logger; - label_ = label; - running_ns_ = 0; - explicit_ = false; - - // Stash away the current split and pause it. - enclosing_split_ = timing_logger->current_split_; - if (enclosing_split_ != NULL) { - enclosing_split_->Pause(); - } - - timing_logger_->current_split_ = this; - - ATRACE_BEGIN(label_); - - start_ns_ = NanoTime(); - if (timing_logger_->verbose_) { - LOG(INFO) << "Begin: " << label_; - } -} - -TimingLogger::ScopedSplit::~ScopedSplit() { - uint64_t current_time = NanoTime(); - uint64_t split_time = current_time - start_ns_; - running_ns_ += split_time; - ATRACE_END(); - - if (timing_logger_->verbose_) { - LOG(INFO) << "End: " << label_ << " " << PrettyDuration(split_time); - } - - // If one or more enclosed explicitly started splits are not terminated we can - // either fail or "unwind" the stack of splits in the timing logger to 'this' - // (by deleting the intervening scoped splits). This implements the latter. - TimingLogger::ScopedSplit* current = timing_logger_->current_split_; - while ((current != NULL) && (current != this)) { - delete current; - current = timing_logger_->current_split_; - } - - CHECK(current != NULL) << "Missing scoped split (" << this->label_ - << ") in timing logger (" << timing_logger_->name_ << ")."; - CHECK(timing_logger_->current_split_ == this); - - timing_logger_->splits_.push_back(SplitTiming(running_ns_, label_)); - - timing_logger_->current_split_ = enclosing_split_; - if (enclosing_split_ != NULL) { - enclosing_split_->Resume(); +void TimingLogger::Verify() { + size_t counts[2] = { 0 }; + for (size_t i = 0; i < timings_.size(); ++i) { + if (i > 0) { + CHECK_LE(timings_[i - 1].GetTime(), timings_[i].GetTime()); + } + ++counts[timings_[i].IsStartTiming() ? 0 : 1]; } + CHECK_EQ(counts[0], counts[1]) << "Number of StartTiming and EndTiming doesn't match"; } - -void TimingLogger::ScopedSplit::TailInsertSplit(const char* label) { - // Sleight of hand here: Rather than embedding a new scoped split, we're updating the current - // scoped split in place. Basically, it's one way to make explicit and scoped splits compose - // well while maintaining the current semantics of NewSplit. An alternative is to push a new split - // since we unwind the stack of scoped splits in the scoped split destructor. However, this implies - // that the current split is not ended by NewSplit (which calls TailInsertSplit), which would - // be different from what we had before. - - uint64_t current_time = NanoTime(); - uint64_t split_time = current_time - start_ns_; - ATRACE_END(); - timing_logger_->splits_.push_back(std::pair<uint64_t, const char*>(split_time, label_)); - - if (timing_logger_->verbose_) { - LOG(INFO) << "End: " << label_ << " " << PrettyDuration(split_time) << "\n" - << "Begin: " << label; +TimingLogger::~TimingLogger() { + if (kIsDebugBuild) { + Verify(); } - - label_ = label; - start_ns_ = current_time; - running_ns_ = 0; - - ATRACE_BEGIN(label); -} - -void TimingLogger::ScopedSplit::Pause() { - uint64_t current_time = NanoTime(); - uint64_t split_time = current_time - start_ns_; - running_ns_ += split_time; - ATRACE_END(); -} - - -void TimingLogger::ScopedSplit::Resume() { - uint64_t current_time = NanoTime(); - - start_ns_ = current_time; - ATRACE_BEGIN(label_); } } // namespace art diff --git a/runtime/base/timing_logger.h b/runtime/base/timing_logger.h index 9b558980c5..b300109e31 100644 --- a/runtime/base/timing_logger.h +++ b/runtime/base/timing_logger.h @@ -77,93 +77,119 @@ class CumulativeLogger { // A timing logger that knows when a split starts for the purposes of logging tools, like systrace. class TimingLogger { public: - // Splits are nanosecond times and split names. - typedef std::pair<uint64_t, const char*> SplitTiming; - typedef std::vector<SplitTiming> SplitTimings; + static constexpr size_t kIndexNotFound = static_cast<size_t>(-1); - explicit TimingLogger(const char* name, bool precise, bool verbose); - ~TimingLogger() { - // TODO: DCHECK(current_split_ == nullptr) << "Forgot to end split: " << current_split_->label_; - } - // Clears current splits and labels. - void Reset(); + class Timing { + public: + Timing(uint64_t time, const char* name) : time_(time), name_(name) { + } + bool IsStartTiming() const { + return !IsEndTiming(); + } + bool IsEndTiming() const { + return name_ == nullptr; + } + uint64_t GetTime() const { + return time_; + } + const char* GetName() const { + return name_; + } - // Starts a split - void StartSplit(const char* new_split_label); + private: + uint64_t time_; + const char* name_; + }; - // Ends the current split and starts the one given by the label. - void NewSplit(const char* new_split_label); + // Extra data that is only calculated when you call dump to prevent excess allocation. + class TimingData { + public: + TimingData() = default; + TimingData(TimingData&& other) { + std::swap(data_, other.data_); + } + TimingData& operator=(TimingData&& other) { + std::swap(data_, other.data_); + return *this; + } + uint64_t GetTotalTime(size_t idx) { + return data_[idx].total_time; + } + uint64_t GetExclusiveTime(size_t idx) { + return data_[idx].exclusive_time; + } - // Ends the current split and records the end time. - void EndSplit(); + private: + // Each begin split has a total time and exclusive time. Exclusive time is total time - total + // time of children nodes. + struct CalculatedDataPoint { + CalculatedDataPoint() : total_time(0), exclusive_time(0) {} + uint64_t total_time; + uint64_t exclusive_time; + }; + std::vector<CalculatedDataPoint> data_; + friend class TimingLogger; + }; + explicit TimingLogger(const char* name, bool precise, bool verbose); + ~TimingLogger(); + // Verify that all open timings have related closed timings. + void Verify(); + // Clears current timings and labels. + void Reset(); + // Starts a timing. + void StartTiming(const char* new_split_label); + // Ends the current timing. + void EndTiming(); + // End the current timing and start a new timing. Usage not recommended. + void NewTiming(const char* new_split_label) { + EndTiming(); + StartTiming(new_split_label); + } + // Returns the total duration of the timings (sum of total times). uint64_t GetTotalNs() const; - - void Dump(std::ostream& os) const; + // Find the index of a timing by name. + size_t FindTimingIndex(const char* name, size_t start_idx) const; + void Dump(std::ostream& os, const char* indent_string = " ") const; // Scoped timing splits that can be nested and composed with the explicit split // starts and ends. - class ScopedSplit { - public: - explicit ScopedSplit(const char* label, TimingLogger* timing_logger); - - ~ScopedSplit(); - - friend class TimingLogger; - - private: - // Pauses timing of the split, usually due to nesting of another split. - void Pause(); - - // Resumes timing of the split, usually because a nested split has ended. - void Resume(); - - // Used by new split to swap splits in place in a ScopedSplit instance. - void TailInsertSplit(const char* label); - - // The scoped split immediately enclosing this split. Essentially, we get a - // stack of nested splits through this field. - ScopedSplit* enclosing_split_; - - // Was this created via TimingLogger's StartSplit? - bool explicit_; - - // The split's name. - const char* label_; - - // The current split's latest start time. (It may have been paused and restarted.) - uint64_t start_ns_; - - // The running time, outside of pauses. - uint64_t running_ns_; - - // The timing logger holding this split. - TimingLogger* timing_logger_; + class ScopedTiming { + public: + explicit ScopedTiming(const char* label, TimingLogger* logger) : logger_(logger) { + logger_->StartTiming(label); + } + ~ScopedTiming() { + logger_->EndTiming(); + } + // Closes the current timing and opens a new timing. + void NewTiming(const char* label) { + logger_->NewTiming(label); + } - DISALLOW_COPY_AND_ASSIGN(ScopedSplit); + private: + TimingLogger* const logger_; // The timing logger which the scoped timing is associated with. + DISALLOW_COPY_AND_ASSIGN(ScopedTiming); }; - const SplitTimings& GetSplits() const { - return splits_; + // Return the time points of when each start / end timings start and finish. + const std::vector<Timing>& GetTimings() const { + return timings_; } - friend class ScopedSplit; + TimingData CalculateTimingData() const; + protected: // The name of the timing logger. const char* const name_; - // Do we want to print the exactly recorded split (true) or round down to the time unit being // used (false). const bool precise_; - // Verbose logging. const bool verbose_; - - // The current scoped split is also the 'top' of the stack of splits in progress. - ScopedSplit* current_split_; - - // Splits that have ended. - SplitTimings splits_; + // Timing points that are either start or end points. For each starting point ret[i] = location + // of end split associated with i. If it is and end split ret[i] = i. + std::vector<Timing> timings_; private: DISALLOW_COPY_AND_ASSIGN(TimingLogger); diff --git a/runtime/base/timing_logger_test.cc b/runtime/base/timing_logger_test.cc index 0757751822..35a73d0a76 100644 --- a/runtime/base/timing_logger_test.cc +++ b/runtime/base/timing_logger_test.cc @@ -26,16 +26,14 @@ class TimingLoggerTest : public CommonRuntimeTest {}; TEST_F(TimingLoggerTest, StartEnd) { const char* split1name = "First Split"; - TimingLogger timings("StartEnd", true, false); - - timings.StartSplit(split1name); - - timings.EndSplit(); // Ends split1. - - const TimingLogger::SplitTimings& splits = timings.GetSplits(); - - EXPECT_EQ(1U, splits.size()); - EXPECT_STREQ(splits[0].second, split1name); + TimingLogger logger("StartEnd", true, false); + logger.StartTiming(split1name); + logger.EndTiming(); // Ends split1. + const auto& timings = logger.GetTimings(); + EXPECT_EQ(2U, timings.size()); // Start, End splits + EXPECT_TRUE(timings[0].IsStartTiming()); + EXPECT_STREQ(timings[0].GetName(), split1name); + EXPECT_TRUE(timings[1].IsEndTiming()); } @@ -43,56 +41,61 @@ TEST_F(TimingLoggerTest, StartNewEnd) { const char* split1name = "First Split"; const char* split2name = "Second Split"; const char* split3name = "Third Split"; - TimingLogger timings("StartNewEnd", true, false); - - timings.StartSplit(split1name); - - timings.NewSplit(split2name); // Ends split1. - - timings.NewSplit(split3name); // Ends split2. - - timings.EndSplit(); // Ends split3. - - const TimingLogger::SplitTimings& splits = timings.GetSplits(); - - EXPECT_EQ(3U, splits.size()); - EXPECT_STREQ(splits[0].second, split1name); - EXPECT_STREQ(splits[1].second, split2name); - EXPECT_STREQ(splits[2].second, split3name); + TimingLogger logger("StartNewEnd", true, false); + logger.StartTiming(split1name); + logger.NewTiming(split2name); + logger.NewTiming(split3name); + logger.EndTiming(); + // Get the timings and verify that they are sane. + const auto& timings = logger.GetTimings(); + // 6 timings in the timing logger at this point. + EXPECT_EQ(6U, timings.size()); + EXPECT_TRUE(timings[0].IsStartTiming()); + EXPECT_STREQ(timings[0].GetName(), split1name); + EXPECT_TRUE(timings[1].IsEndTiming()); + EXPECT_TRUE(timings[2].IsStartTiming()); + EXPECT_STREQ(timings[2].GetName(), split2name); + EXPECT_TRUE(timings[3].IsEndTiming()); + EXPECT_TRUE(timings[4].IsStartTiming()); + EXPECT_STREQ(timings[4].GetName(), split3name); + EXPECT_TRUE(timings[5].IsEndTiming()); } TEST_F(TimingLoggerTest, StartNewEndNested) { - const char* split1name = "First Split"; - const char* split2name = "Second Split"; - const char* split3name = "Third Split"; - const char* split4name = "Fourth Split"; - const char* split5name = "Fifth Split"; - TimingLogger timings("StartNewEndNested", true, false); - - timings.StartSplit(split1name); - - timings.NewSplit(split2name); // Ends split1. - - timings.StartSplit(split3name); - - timings.StartSplit(split4name); - - timings.NewSplit(split5name); // Ends split4. - - timings.EndSplit(); // Ends split5. - - timings.EndSplit(); // Ends split3. - - timings.EndSplit(); // Ends split2. - - const TimingLogger::SplitTimings& splits = timings.GetSplits(); - - EXPECT_EQ(5U, splits.size()); - EXPECT_STREQ(splits[0].second, split1name); - EXPECT_STREQ(splits[1].second, split4name); - EXPECT_STREQ(splits[2].second, split5name); - EXPECT_STREQ(splits[3].second, split3name); - EXPECT_STREQ(splits[4].second, split2name); + const char* name1 = "First Split"; + const char* name2 = "Second Split"; + const char* name3 = "Third Split"; + const char* name4 = "Fourth Split"; + const char* name5 = "Fifth Split"; + TimingLogger logger("StartNewEndNested", true, false); + logger.StartTiming(name1); + logger.NewTiming(name2); // Ends timing1. + logger.StartTiming(name3); + logger.StartTiming(name4); + logger.NewTiming(name5); // Ends timing4. + logger.EndTiming(); // Ends timing5. + logger.EndTiming(); // Ends timing3. + logger.EndTiming(); // Ends timing2. + const auto& timings = logger.GetTimings(); + EXPECT_EQ(10U, timings.size()); + size_t idx_1 = logger.FindTimingIndex(name1, 0); + size_t idx_2 = logger.FindTimingIndex(name2, 0); + size_t idx_3 = logger.FindTimingIndex(name3, 0); + size_t idx_4 = logger.FindTimingIndex(name4, 0); + size_t idx_5 = logger.FindTimingIndex(name5, 0); + size_t idx_6 = logger.FindTimingIndex("Not found", 0); + EXPECT_NE(idx_1, TimingLogger::kIndexNotFound); + EXPECT_NE(idx_2, TimingLogger::kIndexNotFound); + EXPECT_NE(idx_3, TimingLogger::kIndexNotFound); + EXPECT_NE(idx_4, TimingLogger::kIndexNotFound); + EXPECT_NE(idx_5, TimingLogger::kIndexNotFound); + EXPECT_EQ(idx_6, TimingLogger::kIndexNotFound); + TimingLogger::TimingData data = logger.CalculateTimingData(); + EXPECT_STREQ(timings[idx_1].GetName(), name1); + EXPECT_STREQ(timings[idx_2].GetName(), name2); + EXPECT_STREQ(timings[idx_3].GetName(), name3); + EXPECT_STREQ(timings[idx_4].GetName(), name4); + EXPECT_STREQ(timings[idx_5].GetName(), name5); } @@ -101,31 +104,32 @@ TEST_F(TimingLoggerTest, Scoped) { const char* innersplit1 = "Inner Split 1"; const char* innerinnersplit1 = "Inner Inner Split 1"; const char* innersplit2 = "Inner Split 2"; - TimingLogger timings("Scoped", true, false); - + TimingLogger logger("Scoped", true, false); { - TimingLogger::ScopedSplit outer(outersplit, &timings); - - { - TimingLogger::ScopedSplit inner1(innersplit1, &timings); - - { - TimingLogger::ScopedSplit innerinner1(innerinnersplit1, &timings); - } // Ends innerinnersplit1. - } // Ends innersplit1. - + TimingLogger::ScopedTiming outer(outersplit, &logger); + { + TimingLogger::ScopedTiming inner1(innersplit1, &logger); { - TimingLogger::ScopedSplit inner2(innersplit2, &timings); - } // Ends innersplit2. + TimingLogger::ScopedTiming innerinner1(innerinnersplit1, &logger); + } // Ends innerinnersplit1. + } // Ends innersplit1. + { + TimingLogger::ScopedTiming inner2(innersplit2, &logger); + } // Ends innersplit2. } // Ends outersplit. - - const TimingLogger::SplitTimings& splits = timings.GetSplits(); - - EXPECT_EQ(4U, splits.size()); - EXPECT_STREQ(splits[0].second, innerinnersplit1); - EXPECT_STREQ(splits[1].second, innersplit1); - EXPECT_STREQ(splits[2].second, innersplit2); - EXPECT_STREQ(splits[3].second, outersplit); + const size_t idx_outersplit = logger.FindTimingIndex(outersplit, 0); + const size_t idx_innersplit1 = logger.FindTimingIndex(innersplit1, 0); + const size_t idx_innerinnersplit1 = logger.FindTimingIndex(innerinnersplit1, 0); + const size_t idx_innersplit2 = logger.FindTimingIndex(innersplit2, 0); + const auto& timings = logger.GetTimings(); + EXPECT_EQ(8U, timings.size()); // 4 start timings and 4 end timings. + EXPECT_GE(timings[idx_innerinnersplit1].GetTime(), timings[idx_innersplit1].GetTime()); + EXPECT_GE(timings[idx_innersplit2].GetTime(), timings[idx_innersplit1].GetTime()); + TimingLogger::TimingData data(logger.CalculateTimingData()); + EXPECT_GE(data.GetTotalTime(idx_outersplit), data.GetTotalTime(idx_innerinnersplit1)); + EXPECT_GE(data.GetTotalTime(idx_outersplit), + data.GetTotalTime(idx_innersplit1) + data.GetTotalTime(idx_innersplit2)); + EXPECT_GE(data.GetTotalTime(idx_innersplit1), data.GetTotalTime(idx_innerinnersplit1)); } @@ -134,27 +138,24 @@ TEST_F(TimingLoggerTest, ScopedAndExplicit) { const char* innersplit = "Inner Split"; const char* innerinnersplit1 = "Inner Inner Split 1"; const char* innerinnersplit2 = "Inner Inner Split 2"; - TimingLogger timings("Scoped", true, false); - - timings.StartSplit(outersplit); - + TimingLogger logger("Scoped", true, false); + logger.StartTiming(outersplit); { - TimingLogger::ScopedSplit inner(innersplit, &timings); - - timings.StartSplit(innerinnersplit1); - - timings.NewSplit(innerinnersplit2); // Ends innerinnersplit1. + TimingLogger::ScopedTiming inner(innersplit, &logger); + logger.StartTiming(innerinnersplit1); + logger.NewTiming(innerinnersplit2); // Ends innerinnersplit1. + logger.EndTiming(); } // Ends innerinnersplit2, then innersplit. - - timings.EndSplit(); // Ends outersplit. - - const TimingLogger::SplitTimings& splits = timings.GetSplits(); - - EXPECT_EQ(4U, splits.size()); - EXPECT_STREQ(splits[0].second, innerinnersplit1); - EXPECT_STREQ(splits[1].second, innerinnersplit2); - EXPECT_STREQ(splits[2].second, innersplit); - EXPECT_STREQ(splits[3].second, outersplit); + logger.EndTiming(); // Ends outersplit. + const size_t idx_outersplit = logger.FindTimingIndex(outersplit, 0); + const size_t idx_innersplit = logger.FindTimingIndex(innersplit, 0); + const size_t idx_innerinnersplit1 = logger.FindTimingIndex(innerinnersplit1, 0); + const size_t idx_innerinnersplit2 = logger.FindTimingIndex(innerinnersplit2, 0); + const auto& timings = logger.GetTimings(); + EXPECT_EQ(8U, timings.size()); + EXPECT_LE(timings[idx_outersplit].GetTime(), timings[idx_innersplit].GetTime()); + EXPECT_LE(timings[idx_innersplit].GetTime(), timings[idx_innerinnersplit1].GetTime()); + EXPECT_LE(timings[idx_innerinnersplit1].GetTime(), timings[idx_innerinnersplit2].GetTime()); } } // namespace art diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc index 6d5b59cbeb..f29a7ec974 100644 --- a/runtime/base/unix_file/fd_file.cc +++ b/runtime/base/unix_file/fd_file.cc @@ -122,7 +122,9 @@ bool FdFile::ReadFully(void* buffer, size_t byte_count) { char* ptr = static_cast<char*>(buffer); while (byte_count > 0) { ssize_t bytes_read = TEMP_FAILURE_RETRY(read(fd_, ptr, byte_count)); - if (bytes_read == -1) { + if (bytes_read <= 0) { + // 0: end of file + // -1: error return false; } byte_count -= bytes_read; // Reduce the number of remaining bytes. diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc index d620666747..33b3d3e186 100644 --- a/runtime/base/unix_file/fd_file_test.cc +++ b/runtime/base/unix_file/fd_file_test.cc @@ -16,6 +16,7 @@ #include "base/unix_file/fd_file.h" #include "base/unix_file/random_access_file_test.h" +#include "common_runtime_test.h" // For ScratchFile #include "gtest/gtest.h" namespace unix_file { @@ -60,4 +61,15 @@ TEST_F(FdFileTest, OpenClose) { EXPECT_TRUE(file.IsOpened()); } +TEST_F(FdFileTest, ReadFullyEmptyFile) { + // New scratch file, zero-length. + art::ScratchFile tmp; + FdFile file; + ASSERT_TRUE(file.Open(tmp.GetFilename(), O_RDONLY)); + EXPECT_GE(file.Fd(), 0); + EXPECT_TRUE(file.IsOpened()); + uint8_t buffer[16]; + EXPECT_FALSE(file.ReadFully(&buffer, 4)); +} + } // namespace unix_file diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h index edba5020d4..b6810b02b2 100644 --- a/runtime/dex_instruction.h +++ b/runtime/dex_instruction.h @@ -145,28 +145,30 @@ class Instruction { }; enum VerifyFlag { - kVerifyNone = 0x000000, - kVerifyRegA = 0x000001, - kVerifyRegAWide = 0x000002, - kVerifyRegB = 0x000004, - kVerifyRegBField = 0x000008, - kVerifyRegBMethod = 0x000010, - kVerifyRegBNewInstance = 0x000020, - kVerifyRegBString = 0x000040, - kVerifyRegBType = 0x000080, - kVerifyRegBWide = 0x000100, - kVerifyRegC = 0x000200, - kVerifyRegCField = 0x000400, - kVerifyRegCNewArray = 0x000800, - kVerifyRegCType = 0x001000, - kVerifyRegCWide = 0x002000, - kVerifyArrayData = 0x004000, - kVerifyBranchTarget = 0x008000, - kVerifySwitchTargets = 0x010000, - kVerifyVarArg = 0x020000, - kVerifyVarArgRange = 0x040000, - kVerifyRuntimeOnly = 0x080000, - kVerifyError = 0x100000, + kVerifyNone = 0x000000, + kVerifyRegA = 0x000001, + kVerifyRegAWide = 0x000002, + kVerifyRegB = 0x000004, + kVerifyRegBField = 0x000008, + kVerifyRegBMethod = 0x000010, + kVerifyRegBNewInstance = 0x000020, + kVerifyRegBString = 0x000040, + kVerifyRegBType = 0x000080, + kVerifyRegBWide = 0x000100, + kVerifyRegC = 0x000200, + kVerifyRegCField = 0x000400, + kVerifyRegCNewArray = 0x000800, + kVerifyRegCType = 0x001000, + kVerifyRegCWide = 0x002000, + kVerifyArrayData = 0x004000, + kVerifyBranchTarget = 0x008000, + kVerifySwitchTargets = 0x010000, + kVerifyVarArg = 0x020000, + kVerifyVarArgNonZero = 0x040000, + kVerifyVarArgRange = 0x080000, + kVerifyVarArgRangeNonZero = 0x100000, + kVerifyRuntimeOnly = 0x200000, + kVerifyError = 0x400000, }; static constexpr uint32_t kMaxVarArgRegs = 5; @@ -506,7 +508,8 @@ class Instruction { int GetVerifyExtraFlags() const { return (kInstructionVerifyFlags[Opcode()] & (kVerifyArrayData | kVerifyBranchTarget | - kVerifySwitchTargets | kVerifyVarArg | kVerifyVarArgRange | kVerifyError)); + kVerifySwitchTargets | kVerifyVarArg | kVerifyVarArgNonZero | kVerifyVarArgRange | + kVerifyVarArgRangeNonZero | kVerifyError)); } bool GetVerifyIsRuntimeOnly() const { diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h index 4cda58b92e..103b0d74ef 100644 --- a/runtime/dex_instruction_list.h +++ b/runtime/dex_instruction_list.h @@ -128,17 +128,17 @@ V(0x6B, SPUT_BYTE, "sput-byte", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \ V(0x6C, SPUT_CHAR, "sput-char", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \ V(0x6D, SPUT_SHORT, "sput-short", k21c, false, kFieldRef, kContinue | kThrow | kStore | kRegBFieldOrConstant, kVerifyRegA | kVerifyRegBField) \ - V(0x6E, INVOKE_VIRTUAL, "invoke-virtual", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \ - V(0x6F, INVOKE_SUPER, "invoke-super", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \ - V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \ + V(0x6E, INVOKE_VIRTUAL, "invoke-virtual", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \ + V(0x6F, INVOKE_SUPER, "invoke-super", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \ + V(0x70, INVOKE_DIRECT, "invoke-direct", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \ V(0x71, INVOKE_STATIC, "invoke-static", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \ - V(0x72, INVOKE_INTERFACE, "invoke-interface", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArg) \ + V(0x72, INVOKE_INTERFACE, "invoke-interface", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero) \ V(0x73, RETURN_VOID_BARRIER, "return-void-barrier", k10x, false, kNone, kReturn, kVerifyNone) \ - V(0x74, INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \ - V(0x75, INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \ - V(0x76, INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \ + V(0x74, INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \ + V(0x75, INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \ + V(0x76, INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \ V(0x77, INVOKE_STATIC_RANGE, "invoke-static/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \ - V(0x78, INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRange) \ + V(0x78, INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero) \ V(0x79, UNUSED_79, "unused-79", k10x, false, kUnknown, 0, kVerifyError) \ V(0x7A, UNUSED_7A, "unused-7a", k10x, false, kUnknown, 0, kVerifyError) \ V(0x7B, NEG_INT, "neg-int", k12x, true, kNone, kContinue, kVerifyRegA | kVerifyRegB) \ @@ -251,8 +251,8 @@ V(0xE6, IPUT_QUICK, "iput-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \ V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \ V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \ - V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArg | kVerifyRuntimeOnly) \ - V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArgRange | kVerifyRuntimeOnly) \ + V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArgNonZero | kVerifyRuntimeOnly) \ + V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArgRangeNonZero | kVerifyRuntimeOnly) \ V(0xEB, UNUSED_EB, "unused-eb", k10x, false, kUnknown, 0, kVerifyError) \ V(0xEC, UNUSED_EC, "unused-ec", k10x, false, kUnknown, 0, kVerifyError) \ V(0xED, UNUSED_ED, "unused-ed", k10x, false, kUnknown, 0, kVerifyError) \ diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc index 55262f2359..09fb97a5c9 100644 --- a/runtime/gc/allocator/rosalloc.cc +++ b/runtime/gc/allocator/rosalloc.cc @@ -1642,7 +1642,7 @@ void RosAlloc::SetFootprintLimit(size_t new_capacity) { void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { Thread* self = Thread::Current(); // Avoid race conditions on the bulk free bit maps with BulkFree() (GC). - WriterMutexLock wmu(self, bulk_free_lock_); + ReaderMutexLock wmu(self, bulk_free_lock_); for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) { MutexLock mu(self, *size_bracket_locks_[idx]); Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx)); @@ -1720,7 +1720,7 @@ void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) { if (kIsDebugBuild) { Thread* self = Thread::Current(); // Avoid race conditions on the bulk free bit maps with BulkFree() (GC). - WriterMutexLock wmu(self, bulk_free_lock_); + ReaderMutexLock wmu(self, bulk_free_lock_); for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) { MutexLock mu(self, *size_bracket_locks_[idx]); Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx)); @@ -1867,7 +1867,7 @@ void RosAlloc::Verify() { CHECK(Locks::mutator_lock_->IsExclusiveHeld(self)) << "The mutator locks isn't exclusively locked at RosAlloc::Verify()"; MutexLock mu(self, *Locks::thread_list_lock_); - WriterMutexLock wmu(self, bulk_free_lock_); + ReaderMutexLock wmu(self, bulk_free_lock_); std::vector<Run*> runs; { MutexLock mu(self, lock_); diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h index a439188858..13f61ec935 100644 --- a/runtime/gc/allocator/rosalloc.h +++ b/runtime/gc/allocator/rosalloc.h @@ -45,10 +45,7 @@ class RosAlloc { byte magic_num_; // The magic number used for debugging only. bool IsFree() const { - if (kIsDebugBuild) { - return magic_num_ == kMagicNumFree; - } - return true; + return !kIsDebugBuild || magic_num_ == kMagicNumFree; } size_t ByteSize(RosAlloc* rosalloc) const EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) { const byte* fpr_base = reinterpret_cast<const byte*>(this); diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc index a17c36be6d..46d79bf796 100644 --- a/runtime/gc/collector/garbage_collector.cc +++ b/runtime/gc/collector/garbage_collector.cc @@ -31,20 +31,36 @@ namespace art { namespace gc { namespace collector { +Iteration::Iteration() + : duration_ns_(0), timings_("GC iteration timing logger", true, VLOG_IS_ON(heap)) { + Reset(kGcCauseBackground, false); // Reset to some place holder values. +} + +void Iteration::Reset(GcCause gc_cause, bool clear_soft_references) { + timings_.Reset(); + pause_times_.clear(); + duration_ns_ = 0; + clear_soft_references_ = clear_soft_references; + gc_cause_ = gc_cause; + freed_ = ObjectBytePair(); + freed_los_ = ObjectBytePair(); +} + +uint64_t Iteration::GetEstimatedThroughput() const { + // Add 1ms to prevent possible division by 0. + return (static_cast<uint64_t>(freed_.bytes) * 1000) / (NsToMs(GetDurationNs()) + 1); +} + GarbageCollector::GarbageCollector(Heap* heap, const std::string& name) : heap_(heap), name_(name), - gc_cause_(kGcCauseForAlloc), - clear_soft_references_(false), - duration_ns_(0), - timings_(name_.c_str(), true, VLOG_IS_ON(heap)), pause_histogram_((name_ + " paused").c_str(), kPauseBucketSize, kPauseBucketCount), cumulative_timings_(name) { ResetCumulativeStatistics(); } void GarbageCollector::RegisterPause(uint64_t nano_length) { - pause_times_.push_back(nano_length); + GetCurrentIteration()->pause_times_.push_back(nano_length); } void GarbageCollector::ResetCumulativeStatistics() { @@ -59,38 +75,33 @@ void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) { ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), GetName()).c_str()); Thread* self = Thread::Current(); uint64_t start_time = NanoTime(); - timings_.Reset(); - pause_times_.clear(); - duration_ns_ = 0; - clear_soft_references_ = clear_soft_references; - gc_cause_ = gc_cause; - // Reset stats. - freed_bytes_ = 0; - freed_large_object_bytes_ = 0; - freed_objects_ = 0; - freed_large_objects_ = 0; + Iteration* current_iteration = GetCurrentIteration(); + current_iteration->Reset(gc_cause, clear_soft_references); RunPhases(); // Run all the GC phases. // Add the current timings to the cumulative timings. - cumulative_timings_.AddLogger(timings_); + cumulative_timings_.AddLogger(*GetTimings()); // Update cumulative statistics with how many bytes the GC iteration freed. - total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects(); - total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes(); + total_freed_objects_ += current_iteration->GetFreedObjects() + + current_iteration->GetFreedLargeObjects(); + total_freed_bytes_ += current_iteration->GetFreedBytes() + + current_iteration->GetFreedLargeObjectBytes(); uint64_t end_time = NanoTime(); - duration_ns_ = end_time - start_time; + current_iteration->SetDurationNs(end_time - start_time); if (Locks::mutator_lock_->IsExclusiveHeld(self)) { // The entire GC was paused, clear the fake pauses which might be in the pause times and add // the whole GC duration. - pause_times_.clear(); - RegisterPause(duration_ns_); + current_iteration->pause_times_.clear(); + RegisterPause(current_iteration->GetDurationNs()); } - total_time_ns_ += GetDurationNs(); - for (uint64_t pause_time : pause_times_) { + total_time_ns_ += current_iteration->GetDurationNs(); + for (uint64_t pause_time : current_iteration->GetPauseTimes()) { pause_histogram_.AddValue(pause_time / 1000); } ATRACE_END(); } void GarbageCollector::SwapBitmaps() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); // Swap the live and mark bitmaps for each alloc space. This is needed since sweep re-swaps // these bitmaps. The bitmap swapping is an optimization so that we do not need to clear the live // bits of dead objects in the live bitmap. @@ -125,23 +136,6 @@ uint64_t GarbageCollector::GetEstimatedMeanThroughput() const { return (total_freed_bytes_ * 1000) / (NsToMs(GetCumulativeTimings().GetTotalNs()) + 1); } -uint64_t GarbageCollector::GetEstimatedLastIterationThroughput() const { - // Add 1ms to prevent possible division by 0. - return (static_cast<uint64_t>(freed_bytes_) * 1000) / (NsToMs(GetDurationNs()) + 1); -} - -void GarbageCollector::RecordFree(uint64_t freed_objects, int64_t freed_bytes) { - freed_objects_ += freed_objects; - freed_bytes_ += freed_bytes; - GetHeap()->RecordFree(freed_objects, freed_bytes); -} - -void GarbageCollector::RecordFreeLargeObjects(uint64_t freed_objects, int64_t freed_bytes) { - freed_large_objects_ += freed_objects; - freed_large_object_bytes_ += freed_bytes; - GetHeap()->RecordFree(freed_objects, freed_bytes); -} - void GarbageCollector::ResetMeasurements() { cumulative_timings_.Reset(); pause_histogram_.Reset(); @@ -160,6 +154,23 @@ GarbageCollector::ScopedPause::~ScopedPause() { Runtime::Current()->GetThreadList()->ResumeAll(); } +// Returns the current GC iteration and assocated info. +Iteration* GarbageCollector::GetCurrentIteration() { + return heap_->GetCurrentGcIteration(); +} +const Iteration* GarbageCollector::GetCurrentIteration() const { + return heap_->GetCurrentGcIteration(); +} + +void GarbageCollector::RecordFree(const ObjectBytePair& freed) { + GetCurrentIteration()->freed_.Add(freed); + heap_->RecordFree(freed.objects, freed.bytes); +} +void GarbageCollector::RecordFreeLOS(const ObjectBytePair& freed) { + GetCurrentIteration()->freed_los_.Add(freed); + heap_->RecordFree(freed.objects, freed.bytes); +} + } // namespace collector } // namespace gc } // namespace art diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h index f4f9dbb40a..885569efd9 100644 --- a/runtime/gc/collector/garbage_collector.h +++ b/runtime/gc/collector/garbage_collector.h @@ -33,6 +33,78 @@ class Heap; namespace collector { +struct ObjectBytePair { + ObjectBytePair(uint64_t num_objects = 0, int64_t num_bytes = 0) + : objects(num_objects), bytes(num_bytes) {} + void Add(const ObjectBytePair& other) { + objects += other.objects; + bytes += other.bytes; + } + // Number of objects which were freed. + uint64_t objects; + // Freed bytes are signed since the GC can free negative bytes if it promotes objects to a space + // which has a larger allocation size. + int64_t bytes; +}; + +// A information related single garbage collector iteration. Since we only ever have one GC running +// at any given time, we can have a single iteration info. +class Iteration { + public: + Iteration(); + // Returns how long the mutators were paused in nanoseconds. + const std::vector<uint64_t>& GetPauseTimes() const { + return pause_times_; + } + TimingLogger* GetTimings() { + return &timings_; + } + // Returns how long the GC took to complete in nanoseconds. + uint64_t GetDurationNs() const { + return duration_ns_; + } + int64_t GetFreedBytes() const { + return freed_.bytes; + } + int64_t GetFreedLargeObjectBytes() const { + return freed_los_.bytes; + } + uint64_t GetFreedObjects() const { + return freed_.objects; + } + uint64_t GetFreedLargeObjects() const { + return freed_los_.objects; + } + void Reset(GcCause gc_cause, bool clear_soft_references); + // Returns the estimated throughput of the iteration. + uint64_t GetEstimatedThroughput() const; + bool GetClearSoftReferences() const { + return clear_soft_references_; + } + void SetClearSoftReferences(bool clear_soft_references) { + clear_soft_references_ = clear_soft_references; + } + GcCause GetGcCause() const { + return gc_cause_; + } + + private: + void SetDurationNs(uint64_t duration) { + duration_ns_ = duration; + } + + GcCause gc_cause_; + bool clear_soft_references_; + uint64_t duration_ns_; + TimingLogger timings_; + ObjectBytePair freed_; + ObjectBytePair freed_los_; + std::vector<uint64_t> pause_times_; + + friend class GarbageCollector; + DISALLOW_COPY_AND_ASSIGN(Iteration); +}; + class GarbageCollector { public: class SCOPED_LOCKABLE ScopedPause { @@ -62,22 +134,7 @@ class GarbageCollector { Heap* GetHeap() const { return heap_; } - - // Returns how long the mutators were paused in nanoseconds. - const std::vector<uint64_t>& GetPauseTimes() const { - return pause_times_; - } - - // Returns how long the GC took to complete in nanoseconds. - uint64_t GetDurationNs() const { - return duration_ns_; - } - void RegisterPause(uint64_t nano_length); - - TimingLogger& GetTimings() { - return timings_; - } const CumulativeLogger& GetCumulativeTimings() const { return cumulative_timings_; } @@ -87,52 +144,36 @@ class GarbageCollector { // Swap the live and mark bitmaps of spaces that are active for the collector. For partial GC, // this is the allocation space, for full GC then we swap the zygote bitmaps too. void SwapBitmaps() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); - - int64_t GetFreedBytes() const { - return freed_bytes_; - } - - int64_t GetFreedLargeObjectBytes() const { - return freed_large_object_bytes_; - } - - uint64_t GetFreedObjects() const { - return freed_objects_; - } - - uint64_t GetFreedLargeObjects() const { - return freed_large_objects_; - } - uint64_t GetTotalPausedTimeNs() const { return pause_histogram_.AdjustedSum(); } - int64_t GetTotalFreedBytes() const { return total_freed_bytes_; } - uint64_t GetTotalFreedObjects() const { return total_freed_objects_; } - const Histogram<uint64_t>& GetPauseHistogram() const { return pause_histogram_; } - // Reset the cumulative timings and pause histogram. void ResetMeasurements(); - // Returns the estimated throughput in bytes / second. uint64_t GetEstimatedMeanThroughput() const; - - // Returns the estimated throughput of the last GC iteration. - uint64_t GetEstimatedLastIterationThroughput() const; - // Returns how many GC iterations have been run. - size_t GetIterations() const { + size_t NumberOfIterations() const { return GetCumulativeTimings().GetIterations(); } + // Returns the current GC iteration and assocated info. + Iteration* GetCurrentIteration(); + const Iteration* GetCurrentIteration() const; + TimingLogger* GetTimings() { + return &GetCurrentIteration()->timings_; + } + // Record a free of normal objects. + void RecordFree(const ObjectBytePair& freed); + // Record a free of large objects. + void RecordFreeLOS(const ObjectBytePair& freed); protected: // Run all of the GC phases. @@ -141,40 +182,17 @@ class GarbageCollector { // Revoke all the thread-local buffers. virtual void RevokeAllThreadLocalBuffers() = 0; - // Record that you have freed some objects or large objects, calls Heap::RecordFree. - // TODO: These are not thread safe, add a lock if we get parallel sweeping. - void RecordFree(uint64_t freed_objects, int64_t freed_bytes); - void RecordFreeLargeObjects(uint64_t freed_objects, int64_t freed_bytes); - static constexpr size_t kPauseBucketSize = 500; static constexpr size_t kPauseBucketCount = 32; Heap* const heap_; - std::string name_; - - GcCause gc_cause_; - bool clear_soft_references_; - - uint64_t duration_ns_; - TimingLogger timings_; - // Cumulative statistics. Histogram<uint64_t> pause_histogram_; uint64_t total_time_ns_; uint64_t total_freed_objects_; int64_t total_freed_bytes_; - - // Single GC statitstics, freed bytes are signed since the GC can free negative bytes if it - // promotes objects to a space which has a larger allocation size. - int64_t freed_bytes_; - int64_t freed_large_object_bytes_; - uint64_t freed_objects_; - uint64_t freed_large_objects_; - CumulativeLogger cumulative_timings_; - - std::vector<uint64_t> pause_times_; }; } // namespace collector diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index 595dc8f38c..40448524c6 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -49,7 +49,6 @@ #include "thread-inl.h" #include "thread_list.h" -using ::art::mirror::Class; using ::art::mirror::Object; namespace art { @@ -57,7 +56,7 @@ namespace gc { namespace collector { void MarkCompact::BindBitmaps() { - timings_.StartSplit("BindBitmaps"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); // Mark all of the spaces we never collect as immune. for (const auto& space : GetHeap()->GetContinuousSpaces()) { @@ -66,7 +65,6 @@ void MarkCompact::BindBitmaps() { CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space; } } - timings_.EndSplit(); } MarkCompact::MarkCompact(Heap* heap, const std::string& name_prefix) @@ -120,7 +118,7 @@ class CalculateObjectForwardingAddressVisitor { }; void MarkCompact::CalculateObjectForwardingAddresses() { - timings_.NewSplit(__FUNCTION__); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); // The bump pointer in the space where the next forwarding address will be. bump_pointer_ = reinterpret_cast<byte*>(space_->Begin()); // Visit all the marked objects in the bitmap. @@ -131,7 +129,7 @@ void MarkCompact::CalculateObjectForwardingAddresses() { } void MarkCompact::InitializePhase() { - TimingLogger::ScopedSplit split("InitializePhase", &timings_); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); mark_stack_ = heap_->GetMarkStack(); DCHECK(mark_stack_ != nullptr); immune_region_.Reset(); @@ -143,11 +141,10 @@ void MarkCompact::InitializePhase() { } void MarkCompact::ProcessReferences(Thread* self) { - TimingLogger::ScopedSplit split("ProcessReferences", &timings_); WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); heap_->GetReferenceProcessor()->ProcessReferences( - false, &timings_, clear_soft_references_, &HeapReferenceMarkedCallback, &MarkObjectCallback, - &ProcessMarkStackCallback, this); + false, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), + &HeapReferenceMarkedCallback, &MarkObjectCallback, &ProcessMarkStackCallback, this); } class BitmapSetSlowPathVisitor { @@ -187,6 +184,7 @@ inline void MarkCompact::MarkObject(mirror::Object* obj) { } void MarkCompact::MarkingPhase() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Thread* self = Thread::Current(); // Bitmap which describes which objects we have to move. objects_before_forwarding_.reset(accounting::ContinuousSpaceBitmap::Create( @@ -195,21 +193,22 @@ void MarkCompact::MarkingPhase() { objects_with_lockword_.reset(accounting::ContinuousSpaceBitmap::Create( "objects with lock words", space_->Begin(), space_->Size())); CHECK(Locks::mutator_lock_->IsExclusiveHeld(self)); - TimingLogger::ScopedSplit split("MarkingPhase", &timings_); // Assume the cleared space is already empty. BindBitmaps(); + t.NewTiming("ProcessCards"); // Process dirty cards and add dirty cards to mod-union tables. - heap_->ProcessCards(timings_, false); + heap_->ProcessCards(GetTimings(), false); // Clear the whole card table since we can not Get any additional dirty cards during the // paused GC. This saves memory but only works for pause the world collectors. - timings_.NewSplit("ClearCardTable"); + t.NewTiming("ClearCardTable"); heap_->GetCardTable()->ClearCardTable(); // Need to do this before the checkpoint since we don't want any threads to add references to // the live stack during the recursive mark. - timings_.NewSplit("SwapStacks"); if (kUseThreadLocalAllocationStack) { + t.NewTiming("RevokeAllThreadLocalAllocationStacks"); heap_->RevokeAllThreadLocalAllocationStacks(self); } + t.NewTiming("SwapStacks"); heap_->SwapStacks(self); { WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); @@ -227,24 +226,22 @@ void MarkCompact::MarkingPhase() { // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked // before they are properly counted. RevokeAllThreadLocalBuffers(); - timings_.StartSplit("PreSweepingGcVerification"); // Disabled due to an issue where we have objects in the bump pointer space which reference dead // objects. // heap_->PreSweepingGcVerification(this); - timings_.EndSplit(); } void MarkCompact::UpdateAndMarkModUnion() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); for (auto& space : heap_->GetContinuousSpaces()) { // If the space is immune then we need to mark the references to other spaces. if (immune_region_.ContainsSpace(space)) { accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); if (table != nullptr) { // TODO: Improve naming. - TimingLogger::ScopedSplit split( + TimingLogger::ScopedTiming t( space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" : - "UpdateAndMarkImageModUnionTable", - &timings_); + "UpdateAndMarkImageModUnionTable", GetTimings()); table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this); } } @@ -252,27 +249,28 @@ void MarkCompact::UpdateAndMarkModUnion() { } void MarkCompact::MarkReachableObjects() { - timings_.StartSplit("MarkStackAsLive"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); accounting::ObjectStack* live_stack = heap_->GetLiveStack(); - heap_->MarkAllocStackAsLive(live_stack); + { + TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings()); + heap_->MarkAllocStackAsLive(live_stack); + } live_stack->Reset(); // Recursively process the mark stack. ProcessMarkStack(); } void MarkCompact::ReclaimPhase() { - TimingLogger::ScopedSplit split("ReclaimPhase", &timings_); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); // Reclaim unmarked objects. Sweep(false); // Swap the live and mark bitmaps for each space which we modified space. This is an // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound // bitmaps. - timings_.StartSplit("SwapBitmapsAndUnBindBitmaps"); SwapBitmaps(); GetHeap()->UnBindBitmaps(); // Unbind the live and mark bitmaps. Compact(); - timings_.EndSplit(); } void MarkCompact::ResizeMarkStack(size_t new_size) { @@ -340,7 +338,7 @@ class UpdateObjectReferencesVisitor { }; void MarkCompact::UpdateReferences() { - timings_.NewSplit(__FUNCTION__); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Runtime* runtime = Runtime::Current(); // Update roots. runtime->VisitRoots(UpdateRootCallback, this); @@ -350,10 +348,10 @@ void MarkCompact::UpdateReferences() { accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); if (table != nullptr) { // TODO: Improve naming. - TimingLogger::ScopedSplit split( + TimingLogger::ScopedTiming t( space->IsZygoteSpace() ? "UpdateZygoteModUnionTableReferences" : "UpdateImageModUnionTableReferences", - &timings_); + GetTimings()); table->UpdateAndMarkReferences(&UpdateHeapReferenceCallback, this); } else { // No mod union table, so we need to scan the space using bitmap visit. @@ -381,7 +379,7 @@ void MarkCompact::UpdateReferences() { } void MarkCompact::Compact() { - timings_.NewSplit(__FUNCTION__); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); CalculateObjectForwardingAddresses(); UpdateReferences(); MoveObjects(); @@ -389,9 +387,9 @@ void MarkCompact::Compact() { int64_t objects_freed = space_->GetObjectsAllocated() - live_objects_in_space_; int64_t bytes_freed = reinterpret_cast<int64_t>(space_->End()) - reinterpret_cast<int64_t>(bump_pointer_); - timings_.NewSplit("RecordFree"); + t.NewTiming("RecordFree"); space_->RecordFree(objects_freed, bytes_freed); - RecordFree(objects_freed, bytes_freed); + RecordFree(ObjectBytePair(objects_freed, bytes_freed)); space_->SetEnd(bump_pointer_); // Need to zero out the memory we freed. TODO: Use madvise for pages. memset(bump_pointer_, 0, bytes_freed); @@ -399,7 +397,7 @@ void MarkCompact::Compact() { // Marks all objects in the root set. void MarkCompact::MarkRoots() { - timings_.NewSplit("MarkRoots"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Runtime::Current()->VisitRoots(MarkRootCallback, this); } @@ -483,9 +481,8 @@ bool MarkCompact::HeapReferenceMarkedCallback(mirror::HeapReference<mirror::Obje } void MarkCompact::SweepSystemWeaks() { - timings_.StartSplit("SweepSystemWeaks"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Runtime::Current()->SweepSystemWeaks(IsMarkedCallback, this); - timings_.EndSplit(); } bool MarkCompact::ShouldSweepSpace(space::ContinuousSpace* space) const { @@ -523,7 +520,7 @@ void MarkCompact::MoveObject(mirror::Object* obj, size_t len) { } void MarkCompact::MoveObjects() { - timings_.NewSplit(__FUNCTION__); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); // Move the objects in the before forwarding bitmap. MoveObjectVisitor visitor(this); objects_before_forwarding_->VisitMarkedRange(reinterpret_cast<uintptr_t>(space_->Begin()), @@ -533,31 +530,25 @@ void MarkCompact::MoveObjects() { } void MarkCompact::Sweep(bool swap_bitmaps) { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); DCHECK(mark_stack_->IsEmpty()); - TimingLogger::ScopedSplit split("Sweep", &timings_); for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->IsContinuousMemMapAllocSpace()) { space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace(); if (!ShouldSweepSpace(alloc_space)) { continue; } - TimingLogger::ScopedSplit split( - alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_); - size_t freed_objects = 0; - size_t freed_bytes = 0; - alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes); - RecordFree(freed_objects, freed_bytes); + TimingLogger::ScopedTiming t( + alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", GetTimings()); + RecordFree(alloc_space->Sweep(swap_bitmaps)); } } SweepLargeObjects(swap_bitmaps); } void MarkCompact::SweepLargeObjects(bool swap_bitmaps) { - TimingLogger::ScopedSplit split("SweepLargeObjects", &timings_); - size_t freed_objects = 0; - size_t freed_bytes = 0; - heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps, &freed_objects, &freed_bytes); - RecordFreeLargeObjects(freed_objects, freed_bytes); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps)); } // Process the "referent" field in a java.lang.ref.Reference. If the referent has not yet been @@ -596,13 +587,12 @@ void MarkCompact::ScanObject(Object* obj) { // Scan anything that's on the mark stack. void MarkCompact::ProcessMarkStack() { - timings_.StartSplit("ProcessMarkStack"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); while (!mark_stack_->IsEmpty()) { Object* obj = mark_stack_->PopBack(); DCHECK(obj != nullptr); ScanObject(obj); } - timings_.EndSplit(); } void MarkCompact::SetSpace(space::BumpPointerSpace* space) { @@ -611,7 +601,7 @@ void MarkCompact::SetSpace(space::BumpPointerSpace* space) { } void MarkCompact::FinishPhase() { - TimingLogger::ScopedSplit split("FinishPhase", &timings_); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); space_ = nullptr; CHECK(mark_stack_->IsEmpty()); mark_stack_->Reset(); @@ -624,9 +614,8 @@ void MarkCompact::FinishPhase() { } void MarkCompact::RevokeAllThreadLocalBuffers() { - timings_.StartSplit("(Paused)RevokeAllThreadLocalBuffers"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); GetHeap()->RevokeAllThreadLocalBuffers(); - timings_.EndSplit(); } } // namespace collector diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index fbb349eea1..7e97b3b16b 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -81,7 +81,7 @@ static constexpr bool kVerifyRootsMarked = kIsDebugBuild; static constexpr bool kRevokeRosAllocThreadLocalBuffersAtCheckpoint = true; void MarkSweep::BindBitmaps() { - timings_.StartSplit("BindBitmaps"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); // Mark all of the spaces we never collect as immune. for (const auto& space : GetHeap()->GetContinuousSpaces()) { @@ -89,7 +89,6 @@ void MarkSweep::BindBitmaps() { CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space; } } - timings_.EndSplit(); } MarkSweep::MarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix) @@ -110,7 +109,7 @@ MarkSweep::MarkSweep(Heap* heap, bool is_concurrent, const std::string& name_pre } void MarkSweep::InitializePhase() { - TimingLogger::ScopedSplit split("InitializePhase", &timings_); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); mark_stack_ = heap_->GetMarkStack(); DCHECK(mark_stack_ != nullptr); immune_region_.Reset(); @@ -132,9 +131,9 @@ void MarkSweep::InitializePhase() { ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); mark_bitmap_ = heap_->GetMarkBitmap(); } - if (!clear_soft_references_) { + if (!GetCurrentIteration()->GetClearSoftReferences()) { // Always clear soft references if a non-sticky collection. - clear_soft_references_ = GetGcType() != collector::kGcTypeSticky; + GetCurrentIteration()->SetClearSoftReferences(GetGcType() != collector::kGcTypeSticky); } } @@ -170,15 +169,14 @@ void MarkSweep::RunPhases() { } void MarkSweep::ProcessReferences(Thread* self) { - TimingLogger::ScopedSplit split("ProcessReferences", &timings_); WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); GetHeap()->GetReferenceProcessor()->ProcessReferences( - true, &timings_, clear_soft_references_, &HeapReferenceMarkedCallback, &MarkObjectCallback, - &ProcessMarkStackCallback, this); + true, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), + &HeapReferenceMarkedCallback, &MarkObjectCallback, &ProcessMarkStackCallback, this); } void MarkSweep::PausePhase() { - TimingLogger::ScopedSplit split("(Paused)PausePhase", &timings_); + TimingLogger::ScopedTiming t("(Paused)PausePhase", GetTimings()); Thread* self = Thread::Current(); Locks::mutator_lock_->AssertExclusiveHeld(self); if (IsConcurrent()) { @@ -190,7 +188,7 @@ void MarkSweep::PausePhase() { RecursiveMarkDirtyObjects(true, accounting::CardTable::kCardDirty); } { - TimingLogger::ScopedSplit split("SwapStacks", &timings_); + TimingLogger::ScopedTiming t2("SwapStacks", GetTimings()); WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); heap_->SwapStacks(self); live_stack_freeze_size_ = heap_->GetLiveStack()->Size(); @@ -198,9 +196,7 @@ void MarkSweep::PausePhase() { // stacks and don't want anybody to allocate into the live stack. RevokeAllThreadLocalAllocationStacks(self); } - timings_.StartSplit("PreSweepingGcVerification"); heap_->PreSweepingGcVerification(this); - timings_.EndSplit(); // Disallow new system weaks to prevent a race which occurs when someone adds a new system // weak before we sweep them. Since this new system weak may not be marked, the GC may // incorrectly sweep it. This also fixes a race where interning may attempt to return a strong @@ -214,10 +210,11 @@ void MarkSweep::PausePhase() { void MarkSweep::PreCleanCards() { // Don't do this for non concurrent GCs since they don't have any dirty cards. if (kPreCleanCards && IsConcurrent()) { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Thread* self = Thread::Current(); CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self)); // Process dirty cards and add dirty cards to mod union tables, also ages cards. - heap_->ProcessCards(timings_, false); + heap_->ProcessCards(GetTimings(), false); // The checkpoint root marking is required to avoid a race condition which occurs if the // following happens during a reference write: // 1. mutator dirties the card (write barrier) @@ -243,22 +240,19 @@ void MarkSweep::PreCleanCards() { void MarkSweep::RevokeAllThreadLocalAllocationStacks(Thread* self) { if (kUseThreadLocalAllocationStack) { - timings_.NewSplit("RevokeAllThreadLocalAllocationStacks"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Locks::mutator_lock_->AssertExclusiveHeld(self); heap_->RevokeAllThreadLocalAllocationStacks(self); } } void MarkSweep::MarkingPhase() { - TimingLogger::ScopedSplit split("MarkingPhase", &timings_); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Thread* self = Thread::Current(); - BindBitmaps(); FindDefaultSpaceBitmap(); - // Process dirty cards and add dirty cards to mod union tables. - heap_->ProcessCards(timings_, false); - + heap_->ProcessCards(GetTimings(), false); WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); MarkRoots(self); MarkReachableObjects(); @@ -271,7 +265,7 @@ void MarkSweep::UpdateAndMarkModUnion() { if (immune_region_.ContainsSpace(space)) { const char* name = space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" : "UpdateAndMarkImageModUnionTable"; - TimingLogger::ScopedSplit split(name, &timings_); + TimingLogger::ScopedTiming t(name, GetTimings()); accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space); CHECK(mod_union_table != nullptr); mod_union_table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this); @@ -286,7 +280,7 @@ void MarkSweep::MarkReachableObjects() { } void MarkSweep::ReclaimPhase() { - TimingLogger::ScopedSplit split("ReclaimPhase", &timings_); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Thread* self = Thread::Current(); // Process the references concurrently. ProcessReferences(self); @@ -294,25 +288,19 @@ void MarkSweep::ReclaimPhase() { Runtime::Current()->AllowNewSystemWeaks(); { WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); - // Reclaim unmarked objects. Sweep(false); - // Swap the live and mark bitmaps for each space which we modified space. This is an // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound // bitmaps. - timings_.StartSplit("SwapBitmaps"); SwapBitmaps(); - timings_.EndSplit(); - // Unbind the live and mark bitmaps. - TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_); GetHeap()->UnBindBitmaps(); } } void MarkSweep::FindDefaultSpaceBitmap() { - TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", &timings_); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); for (const auto& space : GetHeap()->GetContinuousSpaces()) { accounting::ContinuousSpaceBitmap* bitmap = space->GetMarkBitmap(); // We want to have the main space instead of non moving if possible. @@ -509,11 +497,10 @@ void MarkSweep::VerifyRoots() { } void MarkSweep::MarkRoots(Thread* self) { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); if (Locks::mutator_lock_->IsExclusiveHeld(self)) { // If we exclusively hold the mutator lock, all threads must be suspended. - timings_.StartSplit("MarkRoots"); Runtime::Current()->VisitRoots(MarkRootCallback, this); - timings_.EndSplit(); RevokeAllThreadLocalAllocationStacks(self); } else { MarkRootsCheckpoint(self, kRevokeRosAllocThreadLocalBuffersAtCheckpoint); @@ -525,16 +512,14 @@ void MarkSweep::MarkRoots(Thread* self) { } void MarkSweep::MarkNonThreadRoots() { - timings_.StartSplit("MarkNonThreadRoots"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Runtime::Current()->VisitNonThreadRoots(MarkRootCallback, this); - timings_.EndSplit(); } void MarkSweep::MarkConcurrentRoots(VisitRootFlags flags) { - timings_.StartSplit("MarkConcurrentRoots"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); // Visit all runtime roots and clear dirty flags. Runtime::Current()->VisitConcurrentRoots(MarkRootCallback, this, flags); - timings_.EndSplit(); } class ScanObjectVisitor { @@ -755,7 +740,8 @@ void MarkSweep::ScanGrayObjects(bool paused, byte minimum_age) { Thread* self = Thread::Current(); // Can't have a different split for each space since multiple spaces can have their cards being // scanned at the same time. - timings_.StartSplit(paused ? "(Paused)ScanGrayObjects" : "ScanGrayObjects"); + TimingLogger::ScopedTiming t(paused ? "(Paused)ScanGrayObjects" : __FUNCTION__, + GetTimings()); // Try to take some of the mark stack since we can pass this off to the worker tasks. Object** mark_stack_begin = mark_stack_->Begin(); Object** mark_stack_end = mark_stack_->End(); @@ -808,28 +794,28 @@ void MarkSweep::ScanGrayObjects(bool paused, byte minimum_age) { thread_pool->StartWorkers(self); thread_pool->Wait(self, true, true); thread_pool->StopWorkers(self); - timings_.EndSplit(); } else { for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->GetMarkBitmap() != nullptr) { // Image spaces are handled properly since live == marked for them. + const char* name = nullptr; switch (space->GetGcRetentionPolicy()) { - case space::kGcRetentionPolicyNeverCollect: - timings_.StartSplit(paused ? "(Paused)ScanGrayImageSpaceObjects" : - "ScanGrayImageSpaceObjects"); - break; - case space::kGcRetentionPolicyFullCollect: - timings_.StartSplit(paused ? "(Paused)ScanGrayZygoteSpaceObjects" : - "ScanGrayZygoteSpaceObjects"); - break; - case space::kGcRetentionPolicyAlwaysCollect: - timings_.StartSplit(paused ? "(Paused)ScanGrayAllocSpaceObjects" : - "ScanGrayAllocSpaceObjects"); - break; - } + case space::kGcRetentionPolicyNeverCollect: + name = paused ? "(Paused)ScanGrayImageSpaceObjects" : "ScanGrayImageSpaceObjects"; + break; + case space::kGcRetentionPolicyFullCollect: + name = paused ? "(Paused)ScanGrayZygoteSpaceObjects" : "ScanGrayZygoteSpaceObjects"; + break; + case space::kGcRetentionPolicyAlwaysCollect: + name = paused ? "(Paused)ScanGrayAllocSpaceObjects" : "ScanGrayAllocSpaceObjects"; + break; + default: + LOG(FATAL) << "Unreachable"; + } + TimingLogger::ScopedTiming t(name, GetTimings()); ScanObjectVisitor visitor(this); - card_table->Scan(space->GetMarkBitmap(), space->Begin(), space->End(), visitor, minimum_age); - timings_.EndSplit(); + card_table->Scan(space->GetMarkBitmap(), space->Begin(), space->End(), visitor, + minimum_age); } } } @@ -839,9 +825,7 @@ class RecursiveMarkTask : public MarkStackTask<false> { public: RecursiveMarkTask(ThreadPool* thread_pool, MarkSweep* mark_sweep, accounting::ContinuousSpaceBitmap* bitmap, uintptr_t begin, uintptr_t end) - : MarkStackTask<false>(thread_pool, mark_sweep, 0, NULL), - bitmap_(bitmap), - begin_(begin), + : MarkStackTask<false>(thread_pool, mark_sweep, 0, NULL), bitmap_(bitmap), begin_(begin), end_(end) { } @@ -866,7 +850,7 @@ class RecursiveMarkTask : public MarkStackTask<false> { // Populates the mark stack based on the set of marked objects and // recursively marks until the mark stack is emptied. void MarkSweep::RecursiveMark() { - TimingLogger::ScopedSplit split("RecursiveMark", &timings_); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); // RecursiveMark will build the lists of known instances of the Reference classes. See // DelayReferenceReferent for details. if (kUseRecursiveMark) { @@ -933,25 +917,22 @@ void MarkSweep::RecursiveMarkDirtyObjects(bool paused, byte minimum_age) { } void MarkSweep::ReMarkRoots() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current()); - timings_.StartSplit("(Paused)ReMarkRoots"); Runtime::Current()->VisitRoots( MarkRootCallback, this, static_cast<VisitRootFlags>(kVisitRootFlagNewRoots | kVisitRootFlagStopLoggingNewRoots | kVisitRootFlagClearRootLog)); - timings_.EndSplit(); if (kVerifyRootsMarked) { - timings_.StartSplit("(Paused)VerifyRoots"); + TimingLogger::ScopedTiming t("(Paused)VerifyRoots", GetTimings()); Runtime::Current()->VisitRoots(VerifyRootMarked, this); - timings_.EndSplit(); } } void MarkSweep::SweepSystemWeaks(Thread* self) { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); - timings_.StartSplit("SweepSystemWeaks"); Runtime::Current()->SweepSystemWeaks(IsMarkedCallback, this); - timings_.EndSplit(); } mirror::Object* MarkSweep::VerifySystemWeakIsLiveCallback(Object* obj, void* arg) { @@ -972,6 +953,7 @@ void MarkSweep::VerifyIsLive(const Object* obj) { } void MarkSweep::VerifySystemWeaks() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); // Verify system weaks, uses a special object visitor which returns the input object. Runtime::Current()->SweepSystemWeaks(VerifySystemWeakIsLiveCallback, this); } @@ -1008,8 +990,8 @@ class CheckpointMarkThreadRoots : public Closure { void MarkSweep::MarkRootsCheckpoint(Thread* self, bool revoke_ros_alloc_thread_local_buffers_at_checkpoint) { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); CheckpointMarkThreadRoots check_point(this, revoke_ros_alloc_thread_local_buffers_at_checkpoint); - timings_.StartSplit("MarkRootsCheckpoint"); ThreadList* thread_list = Runtime::Current()->GetThreadList(); // Request the check point is run on all threads returning a count of the threads that must // run through the barrier including self. @@ -1024,19 +1006,16 @@ void MarkSweep::MarkRootsCheckpoint(Thread* self, } Locks::mutator_lock_->SharedLock(self); Locks::heap_bitmap_lock_->ExclusiveLock(self); - timings_.EndSplit(); } void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitmaps) { - timings_.StartSplit("SweepArray"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Thread* self = Thread::Current(); mirror::Object** chunk_free_buffer = reinterpret_cast<mirror::Object**>( sweep_array_free_buffer_mem_map_->BaseBegin()); size_t chunk_free_pos = 0; - size_t freed_bytes = 0; - size_t freed_large_object_bytes = 0; - size_t freed_objects = 0; - size_t freed_large_objects = 0; + ObjectBytePair freed; + ObjectBytePair freed_los; // How many objects are left in the array, modified after each space is swept. Object** objects = allocations->Begin(); size_t count = allocations->Size(); @@ -1077,10 +1056,9 @@ void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitma // if needed. if (!mark_bitmap->Test(obj)) { if (chunk_free_pos >= kSweepArrayChunkFreeSize) { - timings_.StartSplit("FreeList"); - freed_objects += chunk_free_pos; - freed_bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer); - timings_.EndSplit(); + TimingLogger::ScopedTiming t("FreeList", GetTimings()); + freed.objects += chunk_free_pos; + freed.bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer); chunk_free_pos = 0; } chunk_free_buffer[chunk_free_pos++] = obj; @@ -1090,10 +1068,9 @@ void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitma } } if (chunk_free_pos > 0) { - timings_.StartSplit("FreeList"); - freed_objects += chunk_free_pos; - freed_bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer); - timings_.EndSplit(); + TimingLogger::ScopedTiming t("FreeList", GetTimings()); + freed.objects += chunk_free_pos; + freed.bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer); chunk_free_pos = 0; } // All of the references which space contained are no longer in the allocation stack, update @@ -1114,58 +1091,47 @@ void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitma continue; } if (!large_mark_objects->Test(obj)) { - ++freed_large_objects; - freed_large_object_bytes += large_object_space->Free(self, obj); + ++freed_los.objects; + freed_los.bytes += large_object_space->Free(self, obj); } } - timings_.EndSplit(); - - timings_.StartSplit("RecordFree"); - VLOG(heap) << "Freed " << freed_objects << "/" << count << " objects with size " - << PrettySize(freed_bytes); - RecordFree(freed_objects, freed_bytes); - RecordFreeLargeObjects(freed_large_objects, freed_large_object_bytes); - timings_.EndSplit(); - - timings_.StartSplit("ResetStack"); - allocations->Reset(); - timings_.EndSplit(); - + { + TimingLogger::ScopedTiming t("RecordFree", GetTimings()); + RecordFree(freed); + RecordFreeLOS(freed_los); + t.NewTiming("ResetStack"); + allocations->Reset(); + } sweep_array_free_buffer_mem_map_->MadviseDontNeedAndZero(); } void MarkSweep::Sweep(bool swap_bitmaps) { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); // Ensure that nobody inserted items in the live stack after we swapped the stacks. CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size()); - // Mark everything allocated since the last as GC live so that we can sweep concurrently, - // knowing that new allocations won't be marked as live. - timings_.StartSplit("MarkStackAsLive"); - accounting::ObjectStack* live_stack = heap_->GetLiveStack(); - heap_->MarkAllocStackAsLive(live_stack); - live_stack->Reset(); - timings_.EndSplit(); - - DCHECK(mark_stack_->IsEmpty()); + { + TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings()); + // Mark everything allocated since the last as GC live so that we can sweep concurrently, + // knowing that new allocations won't be marked as live. + accounting::ObjectStack* live_stack = heap_->GetLiveStack(); + heap_->MarkAllocStackAsLive(live_stack); + live_stack->Reset(); + DCHECK(mark_stack_->IsEmpty()); + } for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->IsContinuousMemMapAllocSpace()) { space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace(); - TimingLogger::ScopedSplit split( - alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace", &timings_); - size_t freed_objects = 0; - size_t freed_bytes = 0; - alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes); - RecordFree(freed_objects, freed_bytes); + TimingLogger::ScopedTiming split( + alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace", GetTimings()); + RecordFree(alloc_space->Sweep(swap_bitmaps)); } } SweepLargeObjects(swap_bitmaps); } void MarkSweep::SweepLargeObjects(bool swap_bitmaps) { - TimingLogger::ScopedSplit split("SweepLargeObjects", &timings_); - size_t freed_objects = 0; - size_t freed_bytes = 0; - heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps, &freed_objects, &freed_bytes); - RecordFreeLargeObjects(freed_objects, freed_bytes); + TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings()); + RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps)); } // Process the "referent" field in a java.lang.ref.Reference. If the referent has not yet been @@ -1233,7 +1199,7 @@ void MarkSweep::ProcessMarkStackParallel(size_t thread_count) { // Scan anything that's on the mark stack. void MarkSweep::ProcessMarkStack(bool paused) { - timings_.StartSplit(paused ? "(Paused)ProcessMarkStack" : "ProcessMarkStack"); + TimingLogger::ScopedTiming t(paused ? "(Paused)ProcessMarkStack" : __FUNCTION__, GetTimings()); size_t thread_count = GetThreadCount(paused); if (kParallelProcessMarkStack && thread_count > 1 && mark_stack_->Size() >= kMinimumParallelMarkStackSize) { @@ -1266,7 +1232,6 @@ void MarkSweep::ProcessMarkStack(bool paused) { ScanObject(obj); } } - timings_.EndSplit(); } inline bool MarkSweep::IsMarked(const Object* object) const { @@ -1280,7 +1245,7 @@ inline bool MarkSweep::IsMarked(const Object* object) const { } void MarkSweep::FinishPhase() { - TimingLogger::ScopedSplit split("FinishPhase", &timings_); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); if (kCountScannedTypes) { VLOG(gc) << "MarkSweep scanned classes=" << class_count_.LoadRelaxed() << " arrays=" << array_count_.LoadRelaxed() << " other=" << other_count_.LoadRelaxed(); @@ -1317,9 +1282,8 @@ void MarkSweep::RevokeAllThreadLocalBuffers() { // not be in use. GetHeap()->AssertAllBumpPointerSpaceThreadLocalBuffersAreRevoked(); } else { - timings_.StartSplit("(Paused)RevokeAllThreadLocalBuffers"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); GetHeap()->RevokeAllThreadLocalBuffers(); - timings_.EndSplit(); } } diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc index 54e77a7fe0..cabfe2176c 100644 --- a/runtime/gc/collector/semi_space.cc +++ b/runtime/gc/collector/semi_space.cc @@ -59,7 +59,7 @@ static constexpr size_t kBytesPromotedThreshold = 4 * MB; static constexpr size_t kLargeObjectBytesAllocatedThreshold = 16 * MB; void SemiSpace::BindBitmaps() { - timings_.StartSplit("BindBitmaps"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_); // Mark all of the spaces we never collect as immune. for (const auto& space : GetHeap()->GetContinuousSpaces()) { @@ -83,7 +83,6 @@ void SemiSpace::BindBitmaps() { // We won't collect the large object space if a bump pointer space only collection. is_large_object_space_immune_ = true; } - timings_.EndSplit(); } SemiSpace::SemiSpace(Heap* heap, bool generational, const std::string& name_prefix) @@ -131,7 +130,7 @@ void SemiSpace::RunPhases() { } void SemiSpace::InitializePhase() { - TimingLogger::ScopedSplit split("InitializePhase", &timings_); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); mark_stack_ = heap_->GetMarkStack(); DCHECK(mark_stack_ != nullptr); immune_region_.Reset(); @@ -151,14 +150,14 @@ void SemiSpace::InitializePhase() { } void SemiSpace::ProcessReferences(Thread* self) { - TimingLogger::ScopedSplit split("ProcessReferences", &timings_); WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); GetHeap()->GetReferenceProcessor()->ProcessReferences( - false, &timings_, clear_soft_references_, &HeapReferenceMarkedCallback, - &MarkObjectCallback, &ProcessMarkStackCallback, this); + false, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), + &HeapReferenceMarkedCallback, &MarkObjectCallback, &ProcessMarkStackCallback, this); } void SemiSpace::MarkingPhase() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); CHECK(Locks::mutator_lock_->IsExclusiveHeld(self_)); if (kStoreStackTraces) { Locks::mutator_lock_->AssertExclusiveHeld(self_); @@ -176,8 +175,9 @@ void SemiSpace::MarkingPhase() { // to prevent fragmentation. RevokeAllThreadLocalBuffers(); if (generational_) { - if (gc_cause_ == kGcCauseExplicit || gc_cause_ == kGcCauseForNativeAlloc || - clear_soft_references_) { + if (GetCurrentIteration()->GetGcCause() == kGcCauseExplicit || + GetCurrentIteration()->GetGcCause() == kGcCauseForNativeAlloc || + GetCurrentIteration()->GetClearSoftReferences()) { // If an explicit, native allocation-triggered, or last attempt // collection, collect the whole heap. whole_heap_collection_ = true; @@ -191,21 +191,12 @@ void SemiSpace::MarkingPhase() { } } - if (!clear_soft_references_) { - if (!generational_) { - // If non-generational, always clear soft references. - clear_soft_references_ = true; - } else { - // If generational, clear soft references if a whole heap collection. - if (whole_heap_collection_) { - clear_soft_references_ = true; - } - } + if (!generational_ || whole_heap_collection_) { + // If non-generational, always clear soft references. + // If generational, clear soft references if a whole heap collection. + GetCurrentIteration()->SetClearSoftReferences(true); } - Locks::mutator_lock_->AssertExclusiveHeld(self_); - - TimingLogger::ScopedSplit split("MarkingPhase", &timings_); if (generational_) { // If last_gc_to_space_end_ is out of the bounds of the from-space // (the to-space from last GC), then point it to the beginning of @@ -220,15 +211,16 @@ void SemiSpace::MarkingPhase() { // Assume the cleared space is already empty. BindBitmaps(); // Process dirty cards and add dirty cards to mod-union tables. - heap_->ProcessCards(timings_, kUseRememberedSet && generational_); + heap_->ProcessCards(GetTimings(), kUseRememberedSet && generational_); // Clear the whole card table since we can not Get any additional dirty cards during the // paused GC. This saves memory but only works for pause the world collectors. - timings_.NewSplit("ClearCardTable"); + t.NewTiming("ClearCardTable"); heap_->GetCardTable()->ClearCardTable(); // Need to do this before the checkpoint since we don't want any threads to add references to // the live stack during the recursive mark. - timings_.NewSplit("SwapStacks"); + t.NewTiming("SwapStacks"); if (kUseThreadLocalAllocationStack) { + TimingLogger::ScopedTiming t("RevokeAllThreadLocalAllocationStacks", GetTimings()); heap_->RevokeAllThreadLocalAllocationStacks(self_); } heap_->SwapStacks(self_); @@ -245,7 +237,6 @@ void SemiSpace::MarkingPhase() { ReaderMutexLock mu(self_, *Locks::heap_bitmap_lock_); SweepSystemWeaks(); } - timings_.NewSplit("RecordFree"); // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked // before they are properly counted. RevokeAllThreadLocalBuffers(); @@ -257,14 +248,12 @@ void SemiSpace::MarkingPhase() { CHECK_LE(to_objects, from_objects); // Note: Freed bytes can be negative if we copy form a compacted space to a free-list backed // space. - RecordFree(from_objects - to_objects, from_bytes - to_bytes); + RecordFree(ObjectBytePair(from_objects - to_objects, from_bytes - to_bytes)); // Clear and protect the from space. from_space_->Clear(); VLOG(heap) << "Protecting from_space_: " << *from_space_; from_space_->GetMemMap()->Protect(kProtectFromSpace ? PROT_NONE : PROT_READ); - timings_.StartSplit("PreSweepingGcVerification"); heap_->PreSweepingGcVerification(this); - timings_.EndSplit(); if (swap_semi_spaces_) { heap_->SwapSemiSpaces(); } @@ -277,10 +266,10 @@ void SemiSpace::UpdateAndMarkModUnion() { accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); if (table != nullptr) { // TODO: Improve naming. - TimingLogger::ScopedSplit split( + TimingLogger::ScopedTiming t( space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" : "UpdateAndMarkImageModUnionTable", - &timings_); + GetTimings()); table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this); } else if (heap_->FindRememberedSetFromSpace(space) != nullptr) { DCHECK(kUseRememberedSet); @@ -359,12 +348,14 @@ class SemiSpaceVerifyNoFromSpaceReferencesObjectVisitor { }; void SemiSpace::MarkReachableObjects() { - timings_.StartSplit("MarkStackAsLive"); - accounting::ObjectStack* live_stack = heap_->GetLiveStack(); - heap_->MarkAllocStackAsLive(live_stack); - live_stack->Reset(); - - timings_.NewSplit("UpdateAndMarkRememberedSets"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + { + TimingLogger::ScopedTiming t2("MarkStackAsLive", GetTimings()); + accounting::ObjectStack* live_stack = heap_->GetLiveStack(); + heap_->MarkAllocStackAsLive(live_stack); + live_stack->Reset(); + } + t.NewTiming("UpdateAndMarkRememberedSets"); for (auto& space : heap_->GetContinuousSpaces()) { // If the space is immune and has no mod union table (the // non-moving space when the bump pointer space only collection is @@ -403,7 +394,7 @@ void SemiSpace::MarkReachableObjects() { } if (is_large_object_space_immune_) { - timings_.NewSplit("VisitLargeObjects"); + TimingLogger::ScopedTiming t("VisitLargeObjects", GetTimings()); DCHECK(generational_ && !whole_heap_collection_); // Delay copying the live set to the marked set until here from // BindBitmaps() as the large objects on the allocation stack may @@ -421,31 +412,24 @@ void SemiSpace::MarkReachableObjects() { reinterpret_cast<uintptr_t>(large_object_space->End()), visitor); } - timings_.EndSplit(); // Recursively process the mark stack. ProcessMarkStack(); } void SemiSpace::ReclaimPhase() { - TimingLogger::ScopedSplit split("ReclaimPhase", &timings_); - { - WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_); - // Reclaim unmarked objects. - Sweep(false); - // Swap the live and mark bitmaps for each space which we modified space. This is an - // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound - // bitmaps. - timings_.StartSplit("SwapBitmaps"); - SwapBitmaps(); - timings_.EndSplit(); - // Unbind the live and mark bitmaps. - TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_); - GetHeap()->UnBindBitmaps(); - } + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_); + // Reclaim unmarked objects. + Sweep(false); + // Swap the live and mark bitmaps for each space which we modified space. This is an + // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound + // bitmaps. + SwapBitmaps(); + // Unbind the live and mark bitmaps. + GetHeap()->UnBindBitmaps(); if (saved_bytes_ > 0) { VLOG(heap) << "Avoided dirtying " << PrettySize(saved_bytes_); } - if (generational_) { // Record the end (top) of the to space so we can distinguish // between objects that were allocated since the last GC and the @@ -634,8 +618,7 @@ void SemiSpace::MarkRootCallback(Object** root, void* arg, uint32_t /*thread_id* // Marks all objects in the root set. void SemiSpace::MarkRoots() { - timings_.NewSplit("MarkRoots"); - // TODO: Visit up image roots as well? + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Runtime::Current()->VisitRoots(MarkRootCallback, this); } @@ -660,9 +643,8 @@ mirror::Object* SemiSpace::MarkedForwardingAddressCallback(mirror::Object* objec } void SemiSpace::SweepSystemWeaks() { - timings_.StartSplit("SweepSystemWeaks"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Runtime::Current()->SweepSystemWeaks(MarkedForwardingAddressCallback, this); - timings_.EndSplit(); } bool SemiSpace::ShouldSweepSpace(space::ContinuousSpace* space) const { @@ -670,20 +652,17 @@ bool SemiSpace::ShouldSweepSpace(space::ContinuousSpace* space) const { } void SemiSpace::Sweep(bool swap_bitmaps) { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); DCHECK(mark_stack_->IsEmpty()); - TimingLogger::ScopedSplit split("Sweep", &timings_); for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->IsContinuousMemMapAllocSpace()) { space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace(); if (!ShouldSweepSpace(alloc_space)) { continue; } - TimingLogger::ScopedSplit split( - alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_); - size_t freed_objects = 0; - size_t freed_bytes = 0; - alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes); - RecordFree(freed_objects, freed_bytes); + TimingLogger::ScopedTiming split( + alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", GetTimings()); + RecordFree(alloc_space->Sweep(swap_bitmaps)); } } if (!is_large_object_space_immune_) { @@ -693,11 +672,8 @@ void SemiSpace::Sweep(bool swap_bitmaps) { void SemiSpace::SweepLargeObjects(bool swap_bitmaps) { DCHECK(!is_large_object_space_immune_); - TimingLogger::ScopedSplit split("SweepLargeObjects", &timings_); - size_t freed_objects = 0; - size_t freed_bytes = 0; - heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps, &freed_objects, &freed_bytes); - RecordFreeLargeObjects(freed_objects, freed_bytes); + TimingLogger::ScopedTiming split("SweepLargeObjects", GetTimings()); + RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps)); } // Process the "referent" field in a java.lang.ref.Reference. If the referent has not yet been @@ -737,6 +713,7 @@ void SemiSpace::ScanObject(Object* obj) { // Scan anything that's on the mark stack. void SemiSpace::ProcessMarkStack() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); space::MallocSpace* promo_dest_space = nullptr; accounting::ContinuousSpaceBitmap* live_bitmap = nullptr; if (generational_ && !whole_heap_collection_) { @@ -750,7 +727,6 @@ void SemiSpace::ProcessMarkStack() { DCHECK(mark_bitmap != nullptr); DCHECK_EQ(live_bitmap, mark_bitmap); } - timings_.StartSplit("ProcessMarkStack"); while (!mark_stack_->IsEmpty()) { Object* obj = mark_stack_->PopBack(); if (generational_ && !whole_heap_collection_ && promo_dest_space->HasAddress(obj)) { @@ -761,7 +737,6 @@ void SemiSpace::ProcessMarkStack() { } ScanObject(obj); } - timings_.EndSplit(); } inline Object* SemiSpace::GetMarkedForwardAddress(mirror::Object* obj) const @@ -792,7 +767,7 @@ void SemiSpace::SetFromSpace(space::ContinuousMemMapAllocSpace* from_space) { } void SemiSpace::FinishPhase() { - TimingLogger::ScopedSplit split("FinishPhase", &timings_); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); // Null the "to" and "from" spaces since compacting from one to the other isn't valid until // further action is done by the heap. to_space_ = nullptr; @@ -833,9 +808,8 @@ void SemiSpace::FinishPhase() { } void SemiSpace::RevokeAllThreadLocalBuffers() { - timings_.StartSplit("(Paused)RevokeAllThreadLocalBuffers"); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); GetHeap()->RevokeAllThreadLocalBuffers(); - timings_.EndSplit(); } } // namespace collector diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 1c94d6f224..696728ba9a 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -866,7 +866,10 @@ void Heap::DoPendingTransitionOrTrim() { // about pauses. Runtime* runtime = Runtime::Current(); runtime->GetThreadList()->SuspendAll(); - runtime->GetMonitorList()->DeflateMonitors(); + uint64_t start_time = NanoTime(); + size_t count = runtime->GetMonitorList()->DeflateMonitors(); + VLOG(heap) << "Deflating " << count << " monitors took " + << PrettyDuration(NanoTime() - start_time); runtime->GetThreadList()->ResumeAll(); // Do a heap trim if it is needed. Trim(); @@ -1580,6 +1583,7 @@ class ZygoteCompactingCollector FINAL : public collector::SemiSpace { }; void Heap::UnBindBitmaps() { + TimingLogger::ScopedTiming t("UnBindBitmaps", GetCurrentGcIteration()->GetTimings()); for (const auto& space : GetContinuousSpaces()) { if (space->IsContinuousMemMapAllocSpace()) { space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace(); @@ -1643,8 +1647,8 @@ void Heap::PreZygoteFork() { if (temp_space_ != nullptr) { CHECK(temp_space_->IsEmpty()); } - total_objects_freed_ever_ += semi_space_collector_->GetFreedObjects(); - total_bytes_freed_ever_ += semi_space_collector_->GetFreedBytes(); + total_objects_freed_ever_ += GetCurrentGcIteration()->GetFreedObjects(); + total_bytes_freed_ever_ += GetCurrentGcIteration()->GetFreedBytes(); // Update the end and write out image. non_moving_space_->SetEnd(target_space.End()); non_moving_space_->SetLimit(target_space.Limit()); @@ -1838,17 +1842,17 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus << "Could not find garbage collector with collector_type=" << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type; collector->Run(gc_cause, clear_soft_references || runtime->IsZygote()); - total_objects_freed_ever_ += collector->GetFreedObjects(); - total_bytes_freed_ever_ += collector->GetFreedBytes(); + total_objects_freed_ever_ += GetCurrentGcIteration()->GetFreedObjects(); + total_bytes_freed_ever_ += GetCurrentGcIteration()->GetFreedBytes(); RequestHeapTrim(); // Enqueue cleared references. reference_processor_.EnqueueClearedReferences(self); // Grow the heap so that we know when to perform the next GC. GrowForUtilization(collector); - const size_t duration = collector->GetDurationNs(); - const std::vector<uint64_t>& pause_times = collector->GetPauseTimes(); + const size_t duration = GetCurrentGcIteration()->GetDurationNs(); + const std::vector<uint64_t>& pause_times = GetCurrentGcIteration()->GetPauseTimes(); // Print the GC if it is an explicit GC (e.g. Runtime.gc()) or a slow GC - // (mutator time blocked >= long_pause_log_threshold_). + // (mutator time blocked >= long_pause_log_threshold_). bool log_gc = gc_cause == kGcCauseExplicit; if (!log_gc && CareAboutPauseTimes()) { // GC for alloc pauses the allocating thread, so consider it as a pause. @@ -1868,14 +1872,14 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus << ((i != pause_times.size() - 1) ? "," : ""); } LOG(INFO) << gc_cause << " " << collector->GetName() - << " GC freed " << collector->GetFreedObjects() << "(" - << PrettySize(collector->GetFreedBytes()) << ") AllocSpace objects, " - << collector->GetFreedLargeObjects() << "(" - << PrettySize(collector->GetFreedLargeObjectBytes()) << ") LOS objects, " + << " GC freed " << current_gc_iteration_.GetFreedObjects() << "(" + << PrettySize(current_gc_iteration_.GetFreedBytes()) << ") AllocSpace objects, " + << current_gc_iteration_.GetFreedLargeObjects() << "(" + << PrettySize(current_gc_iteration_.GetFreedLargeObjectBytes()) << ") LOS objects, " << percent_free << "% free, " << PrettySize(current_heap_size) << "/" << PrettySize(total_memory) << ", " << "paused " << pause_string.str() << " total " << PrettyDuration((duration / 1000) * 1000); - VLOG(heap) << ConstDumpable<TimingLogger>(collector->GetTimings()); + VLOG(heap) << ConstDumpable<TimingLogger>(*current_gc_iteration_.GetTimings()); } FinishGC(self, gc_type); // Inform DDMS that a GC completed. @@ -2313,7 +2317,8 @@ accounting::RememberedSet* Heap::FindRememberedSetFromSpace(space::Space* space) return it->second; } -void Heap::ProcessCards(TimingLogger& timings, bool use_rem_sets) { +void Heap::ProcessCards(TimingLogger* timings, bool use_rem_sets) { + TimingLogger::ScopedTiming t(__FUNCTION__, timings); // Clear cards and keep track of cards cleared in the mod-union table. for (const auto& space : continuous_spaces_) { accounting::ModUnionTable* table = FindModUnionTableFromSpace(space); @@ -2321,15 +2326,15 @@ void Heap::ProcessCards(TimingLogger& timings, bool use_rem_sets) { if (table != nullptr) { const char* name = space->IsZygoteSpace() ? "ZygoteModUnionClearCards" : "ImageModUnionClearCards"; - TimingLogger::ScopedSplit split(name, &timings); + TimingLogger::ScopedTiming t(name, timings); table->ClearCards(); } else if (use_rem_sets && rem_set != nullptr) { DCHECK(collector::SemiSpace::kUseRememberedSet && collector_type_ == kCollectorTypeGSS) << static_cast<int>(collector_type_); - TimingLogger::ScopedSplit split("AllocSpaceRemSetClearCards", &timings); + TimingLogger::ScopedTiming t("AllocSpaceRemSetClearCards", timings); rem_set->ClearCards(); } else if (space->GetType() != space::kSpaceTypeBumpPointerSpace) { - TimingLogger::ScopedSplit split("AllocSpaceClearCards", &timings); + TimingLogger::ScopedTiming t("AllocSpaceClearCards", timings); // No mod union table for the AllocSpace. Age the cards so that the GC knows that these cards // were dirty before the GC started. // TODO: Need to use atomic for the case where aged(cleaning thread) -> dirty(other thread) @@ -2337,7 +2342,8 @@ void Heap::ProcessCards(TimingLogger& timings, bool use_rem_sets) { // The races are we either end up with: Aged card, unaged card. Since we have the checkpoint // roots and then we scan / update mod union tables after. We will always scan either card. // If we end up with the non aged card, we scan it it in the pause. - card_table_->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(), VoidFunctor()); + card_table_->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(), + VoidFunctor()); } } } @@ -2347,9 +2353,10 @@ static void IdentityMarkHeapReferenceCallback(mirror::HeapReference<mirror::Obje void Heap::PreGcVerificationPaused(collector::GarbageCollector* gc) { Thread* const self = Thread::Current(); - TimingLogger* const timings = &gc->GetTimings(); + TimingLogger* const timings = current_gc_iteration_.GetTimings(); + TimingLogger::ScopedTiming t(__FUNCTION__, timings); if (verify_pre_gc_heap_) { - TimingLogger::ScopedSplit split("PreGcVerifyHeapReferences", timings); + TimingLogger::ScopedTiming t("(Paused)PreGcVerifyHeapReferences", timings); ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_); size_t failures = VerifyHeapReferences(); if (failures > 0) { @@ -2359,7 +2366,7 @@ void Heap::PreGcVerificationPaused(collector::GarbageCollector* gc) { } // Check that all objects which reference things in the live stack are on dirty cards. if (verify_missing_card_marks_) { - TimingLogger::ScopedSplit split("PreGcVerifyMissingCardMarks", timings); + TimingLogger::ScopedTiming t("(Paused)PreGcVerifyMissingCardMarks", timings); ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_); SwapStacks(self); // Sort the live stack so that we can quickly binary search it later. @@ -2369,7 +2376,7 @@ void Heap::PreGcVerificationPaused(collector::GarbageCollector* gc) { SwapStacks(self); } if (verify_mod_union_table_) { - TimingLogger::ScopedSplit split("PreGcVerifyModUnionTables", timings); + TimingLogger::ScopedTiming t("(Paused)PreGcVerifyModUnionTables", timings); ReaderMutexLock reader_lock(self, *Locks::heap_bitmap_lock_); for (const auto& table_pair : mod_union_tables_) { accounting::ModUnionTable* mod_union_table = table_pair.second; @@ -2389,17 +2396,18 @@ void Heap::PreGcVerification(collector::GarbageCollector* gc) { void Heap::PrePauseRosAllocVerification(collector::GarbageCollector* gc) { // TODO: Add a new runtime option for this? if (verify_pre_gc_rosalloc_) { - RosAllocVerification(&gc->GetTimings(), "PreGcRosAllocVerification"); + RosAllocVerification(current_gc_iteration_.GetTimings(), "PreGcRosAllocVerification"); } } void Heap::PreSweepingGcVerification(collector::GarbageCollector* gc) { Thread* const self = Thread::Current(); - TimingLogger* const timings = &gc->GetTimings(); + TimingLogger* const timings = current_gc_iteration_.GetTimings(); + TimingLogger::ScopedTiming t(__FUNCTION__, timings); // Called before sweeping occurs since we want to make sure we are not going so reclaim any // reachable objects. if (verify_pre_sweeping_heap_) { - TimingLogger::ScopedSplit split("PostSweepingVerifyHeapReferences", timings); + TimingLogger::ScopedTiming t("(Paused)PostSweepingVerifyHeapReferences", timings); CHECK_NE(self->GetState(), kRunnable); WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); // Swapping bound bitmaps does nothing. @@ -2421,17 +2429,18 @@ void Heap::PreSweepingGcVerification(collector::GarbageCollector* gc) { void Heap::PostGcVerificationPaused(collector::GarbageCollector* gc) { // Only pause if we have to do some verification. Thread* const self = Thread::Current(); - TimingLogger* const timings = &gc->GetTimings(); + TimingLogger* const timings = GetCurrentGcIteration()->GetTimings(); + TimingLogger::ScopedTiming t(__FUNCTION__, timings); if (verify_system_weaks_) { ReaderMutexLock mu2(self, *Locks::heap_bitmap_lock_); collector::MarkSweep* mark_sweep = down_cast<collector::MarkSweep*>(gc); mark_sweep->VerifySystemWeaks(); } if (verify_post_gc_rosalloc_) { - RosAllocVerification(timings, "PostGcRosAllocVerification"); + RosAllocVerification(timings, "(Paused)PostGcRosAllocVerification"); } if (verify_post_gc_heap_) { - TimingLogger::ScopedSplit split("PostGcVerifyHeapReferences", timings); + TimingLogger::ScopedTiming t("(Paused)PostGcVerifyHeapReferences", timings); ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_); size_t failures = VerifyHeapReferences(); if (failures > 0) { @@ -2449,7 +2458,7 @@ void Heap::PostGcVerification(collector::GarbageCollector* gc) { } void Heap::RosAllocVerification(TimingLogger* timings, const char* name) { - TimingLogger::ScopedSplit split(name, timings); + TimingLogger::ScopedTiming t(name, timings); for (const auto& space : continuous_spaces_) { if (space->IsRosAllocSpace()) { VLOG(heap) << name << " : " << space->GetName(); @@ -2575,9 +2584,9 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran) { // We also check that the bytes allocated aren't over the footprint limit in order to prevent a // pathological case where dead objects which aren't reclaimed by sticky could get accumulated // if the sticky GC throughput always remained >= the full/partial throughput. - if (collector_ran->GetEstimatedLastIterationThroughput() * kStickyGcThroughputAdjustment >= + if (current_gc_iteration_.GetEstimatedThroughput() * kStickyGcThroughputAdjustment >= non_sticky_collector->GetEstimatedMeanThroughput() && - non_sticky_collector->GetIterations() > 0 && + non_sticky_collector->NumberOfIterations() > 0 && bytes_allocated <= max_allowed_footprint_) { next_gc_type_ = collector::kGcTypeSticky; } else { @@ -2595,7 +2604,7 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran) { if (IsGcConcurrent()) { // Calculate when to perform the next ConcurrentGC. // Calculate the estimated GC duration. - const double gc_duration_seconds = NsToMs(collector_ran->GetDurationNs()) / 1000.0; + const double gc_duration_seconds = NsToMs(current_gc_iteration_.GetDurationNs()) / 1000.0; // Estimate how many remaining bytes we will have when we need to start the next GC. size_t remaining_bytes = allocation_rate_ * gc_duration_seconds; remaining_bytes = std::min(remaining_bytes, kMaxConcurrentRemainingBytes); diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 368a20c5da..a34cd3871d 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -27,6 +27,7 @@ #include "gc/accounting/atomic_stack.h" #include "gc/accounting/card_table.h" #include "gc/gc_cause.h" +#include "gc/collector/garbage_collector.h" #include "gc/collector/gc_type.h" #include "gc/collector_type.h" #include "globals.h" @@ -317,6 +318,13 @@ class Heap { return discontinuous_spaces_; } + const collector::Iteration* GetCurrentGcIteration() const { + return ¤t_gc_iteration_; + } + collector::Iteration* GetCurrentGcIteration() { + return ¤t_gc_iteration_; + } + // Enable verification of object references when the runtime is sufficiently initialized. void EnableObjectValidation() { verify_object_mode_ = kVerifyObjectSupport; @@ -690,7 +698,7 @@ class Heap { void SwapStacks(Thread* self); // Clear cards and update the mod union table. - void ProcessCards(TimingLogger& timings, bool use_rem_sets); + void ProcessCards(TimingLogger* timings, bool use_rem_sets); // Signal the heap trim daemon that there is something to do, either a heap transition or heap // trim. @@ -849,6 +857,9 @@ class Heap { // Data structure GC overhead. Atomic<size_t> gc_memory_overhead_; + // Info related to the current or previous GC iteration. + collector::Iteration current_gc_iteration_; + // Heap verification flags. const bool verify_missing_card_marks_; const bool verify_system_weaks_; diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc index 292781e6e7..e52bc1fd1e 100644 --- a/runtime/gc/reference_processor.cc +++ b/runtime/gc/reference_processor.cc @@ -110,6 +110,7 @@ void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timing MarkObjectCallback* mark_object_callback, ProcessMarkStackCallback* process_mark_stack_callback, void* arg) { + TimingLogger::ScopedTiming t(concurrent ? __FUNCTION__ : "(Paused)ProcessReferences", timings); Thread* self = Thread::Current(); { MutexLock mu(self, lock_); @@ -118,10 +119,9 @@ void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timing process_references_args_.arg_ = arg; CHECK_EQ(slow_path_enabled_, concurrent) << "Slow path must be enabled iff concurrent"; } - timings->StartSplit(concurrent ? "ProcessReferences" : "(Paused)ProcessReferences"); // Unless required to clear soft references with white references, preserve some white referents. if (!clear_soft_references) { - TimingLogger::ScopedSplit split(concurrent ? "ForwardSoftReferences" : + TimingLogger::ScopedTiming split(concurrent ? "ForwardSoftReferences" : "(Paused)ForwardSoftReferences", timings); if (concurrent) { StartPreservingReferences(self); @@ -138,7 +138,7 @@ void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timing soft_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg); weak_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg); { - TimingLogger::ScopedSplit split(concurrent ? "EnqueueFinalizerReferences" : + TimingLogger::ScopedTiming t(concurrent ? "EnqueueFinalizerReferences" : "(Paused)EnqueueFinalizerReferences", timings); if (concurrent) { StartPreservingReferences(self); @@ -173,7 +173,6 @@ void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timing DisableSlowPath(self); } } - timings->EndSplit(); } // Process the "referent" field in a java.lang.ref.Reference. If the referent has not yet been diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc index 54a63f065d..abae8ff346 100644 --- a/runtime/gc/space/large_object_space.cc +++ b/runtime/gc/space/large_object_space.cc @@ -411,28 +411,24 @@ void LargeObjectSpace::SweepCallback(size_t num_ptrs, mirror::Object** ptrs, voi bitmap->Clear(ptrs[i]); } } - context->freed_objects += num_ptrs; - context->freed_bytes += space->FreeList(self, num_ptrs, ptrs); + context->freed.objects += num_ptrs; + context->freed.bytes += space->FreeList(self, num_ptrs, ptrs); } -void LargeObjectSpace::Sweep(bool swap_bitmaps, size_t* out_freed_objects, - size_t* out_freed_bytes) { +collector::ObjectBytePair LargeObjectSpace::Sweep(bool swap_bitmaps) { if (Begin() >= End()) { - return; + return collector::ObjectBytePair(0, 0); } accounting::LargeObjectBitmap* live_bitmap = GetLiveBitmap(); accounting::LargeObjectBitmap* mark_bitmap = GetMarkBitmap(); if (swap_bitmaps) { std::swap(live_bitmap, mark_bitmap); } - DCHECK(out_freed_objects != nullptr); - DCHECK(out_freed_bytes != nullptr); - SweepCallbackContext scc(swap_bitmaps, this); + AllocSpace::SweepCallbackContext scc(swap_bitmaps, this); accounting::LargeObjectBitmap::SweepWalk(*live_bitmap, *mark_bitmap, reinterpret_cast<uintptr_t>(Begin()), reinterpret_cast<uintptr_t>(End()), SweepCallback, &scc); - *out_freed_objects += scc.freed_objects; - *out_freed_bytes += scc.freed_bytes; + return scc.freed; } } // namespace space diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h index a84b43a8a1..01982d06ab 100644 --- a/runtime/gc/space/large_object_space.h +++ b/runtime/gc/space/large_object_space.h @@ -73,7 +73,7 @@ class LargeObjectSpace : public DiscontinuousSpace, public AllocSpace { return this; } - void Sweep(bool swap_bitmaps, size_t* out_freed_objects, size_t* out_freed_bytes); + collector::ObjectBytePair Sweep(bool swap_bitmaps); virtual bool CanMoveObjects() const OVERRIDE { return false; diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc index 57ed0bd35c..4d74f3c246 100644 --- a/runtime/gc/space/malloc_space.cc +++ b/runtime/gc/space/malloc_space.cc @@ -242,8 +242,8 @@ void MallocSpace::SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* ar // Use a bulk free, that merges consecutive objects before freeing or free per object? // Documentation suggests better free performance with merging, but this may be at the expensive // of allocation. - context->freed_objects += num_ptrs; - context->freed_bytes += space->FreeList(self, num_ptrs, ptrs); + context->freed.objects += num_ptrs; + context->freed.bytes += space->FreeList(self, num_ptrs, ptrs); } } // namespace space diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc index 4e2841691e..bff28f6d19 100644 --- a/runtime/gc/space/space.cc +++ b/runtime/gc/space/space.cc @@ -81,14 +81,12 @@ DiscontinuousSpace::DiscontinuousSpace(const std::string& name, CHECK(mark_bitmap_.get() != nullptr); } -void ContinuousMemMapAllocSpace::Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes) { - DCHECK(freed_objects != nullptr); - DCHECK(freed_bytes != nullptr); +collector::ObjectBytePair ContinuousMemMapAllocSpace::Sweep(bool swap_bitmaps) { accounting::ContinuousSpaceBitmap* live_bitmap = GetLiveBitmap(); accounting::ContinuousSpaceBitmap* mark_bitmap = GetMarkBitmap(); // If the bitmaps are bound then sweeping this space clearly won't do anything. if (live_bitmap == mark_bitmap) { - return; + return collector::ObjectBytePair(0, 0); } SweepCallbackContext scc(swap_bitmaps, this); if (swap_bitmaps) { @@ -98,8 +96,7 @@ void ContinuousMemMapAllocSpace::Sweep(bool swap_bitmaps, size_t* freed_objects, accounting::ContinuousSpaceBitmap::SweepWalk( *live_bitmap, *mark_bitmap, reinterpret_cast<uintptr_t>(Begin()), reinterpret_cast<uintptr_t>(End()), GetSweepCallback(), reinterpret_cast<void*>(&scc)); - *freed_objects += scc.freed_objects; - *freed_bytes += scc.freed_bytes; + return scc.freed; } // Returns the old mark bitmap. @@ -136,9 +133,8 @@ void ContinuousMemMapAllocSpace::SwapBitmaps() { mark_bitmap_->SetName(temp_name); } -Space::SweepCallbackContext::SweepCallbackContext(bool swap_bitmaps, space::Space* space) - : swap_bitmaps(swap_bitmaps), space(space), self(Thread::Current()), freed_objects(0), - freed_bytes(0) { +AllocSpace::SweepCallbackContext::SweepCallbackContext(bool swap_bitmaps, space::Space* space) + : swap_bitmaps(swap_bitmaps), space(space), self(Thread::Current()) { } } // namespace space diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h index 8415fa18ad..8444a70b9c 100644 --- a/runtime/gc/space/space.h +++ b/runtime/gc/space/space.h @@ -23,6 +23,7 @@ #include "base/macros.h" #include "base/mutex.h" #include "gc/accounting/space_bitmap.h" +#include "gc/collector/garbage_collector.h" #include "globals.h" #include "image.h" #include "mem_map.h" @@ -172,16 +173,6 @@ class Space { std::string name_; protected: - struct SweepCallbackContext { - public: - SweepCallbackContext(bool swap_bitmaps, space::Space* space); - const bool swap_bitmaps; - space::Space* const space; - Thread* const self; - size_t freed_objects; - size_t freed_bytes; - }; - // When should objects within this space be reclaimed? Not constant as we vary it in the case // of Zygote forking. GcRetentionPolicy gc_retention_policy_; @@ -232,6 +223,14 @@ class AllocSpace { virtual void RevokeAllThreadLocalBuffers() = 0; protected: + struct SweepCallbackContext { + SweepCallbackContext(bool swap_bitmaps, space::Space* space); + const bool swap_bitmaps; + space::Space* const space; + Thread* const self; + collector::ObjectBytePair freed; + }; + AllocSpace() {} virtual ~AllocSpace() {} @@ -415,7 +414,7 @@ class ContinuousMemMapAllocSpace : public MemMapSpace, public AllocSpace { return mark_bitmap_.get(); } - void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes); + collector::ObjectBytePair Sweep(bool swap_bitmaps); virtual accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() = 0; protected: diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h index 1477324ca0..325b089a48 100644 --- a/runtime/jdwp/jdwp.h +++ b/runtime/jdwp/jdwp.h @@ -294,14 +294,14 @@ struct JdwpState { ObjectId threadId) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void CleanupMatchList(JdwpEvent** match_list, - int match_count) + size_t match_count) EXCLUSIVE_LOCKS_REQUIRED(event_list_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void EventFinish(ExpandBuf* pReq); void FindMatchingEvents(JdwpEventKind eventKind, - ModBasket* basket, + const ModBasket& basket, JdwpEvent** match_list, - int* pMatchCount) + size_t* pMatchCount) EXCLUSIVE_LOCKS_REQUIRED(event_list_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void UnregisterEvent(JdwpEvent* pEvent) diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc index cb2c420dbb..86c84e8b0f 100644 --- a/runtime/jdwp/jdwp_event.cc +++ b/runtime/jdwp/jdwp_event.cc @@ -397,7 +397,7 @@ static JdwpEvent** AllocMatchList(size_t event_count) { * Run through the list and remove any entries with an expired "count" mod * from the event list, then free the match list. */ -void JdwpState::CleanupMatchList(JdwpEvent** match_list, int match_count) { +void JdwpState::CleanupMatchList(JdwpEvent** match_list, size_t match_count) { JdwpEvent** ppEvent = match_list; while (match_count--) { @@ -405,7 +405,8 @@ void JdwpState::CleanupMatchList(JdwpEvent** match_list, int match_count) { for (int i = 0; i < pEvent->modCount; i++) { if (pEvent->mods[i].modKind == MK_COUNT && pEvent->mods[i].count.count == 0) { - VLOG(jdwp) << "##### Removing expired event"; + VLOG(jdwp) << StringPrintf("##### Removing expired event (requestId=%#" PRIx32 ")", + pEvent->requestId); UnregisterEvent(pEvent); EventFree(pEvent); break; @@ -445,7 +446,7 @@ static bool PatternMatch(const char* pattern, const std::string& target) { * If we find a Count mod before rejecting an event, we decrement it. We * need to do this even if later mods cause us to ignore the event. */ -static bool ModsMatch(JdwpEvent* pEvent, ModBasket* basket) +static bool ModsMatch(JdwpEvent* pEvent, const ModBasket& basket) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { JdwpEventMod* pMod = pEvent->mods; @@ -462,53 +463,53 @@ static bool ModsMatch(JdwpEvent* pEvent, ModBasket* basket) CHECK(false); // should not be getting these break; case MK_THREAD_ONLY: - if (pMod->threadOnly.threadId != basket->threadId) { + if (pMod->threadOnly.threadId != basket.threadId) { return false; } break; case MK_CLASS_ONLY: - if (!Dbg::MatchType(basket->classId, pMod->classOnly.refTypeId)) { + if (!Dbg::MatchType(basket.classId, pMod->classOnly.refTypeId)) { return false; } break; case MK_CLASS_MATCH: - if (!PatternMatch(pMod->classMatch.classPattern, basket->className)) { + if (!PatternMatch(pMod->classMatch.classPattern, basket.className)) { return false; } break; case MK_CLASS_EXCLUDE: - if (PatternMatch(pMod->classMatch.classPattern, basket->className)) { + if (PatternMatch(pMod->classMatch.classPattern, basket.className)) { return false; } break; case MK_LOCATION_ONLY: - if (pMod->locationOnly.loc != *basket->pLoc) { + if (pMod->locationOnly.loc != *basket.pLoc) { return false; } break; case MK_EXCEPTION_ONLY: - if (pMod->exceptionOnly.refTypeId != 0 && !Dbg::MatchType(basket->excepClassId, pMod->exceptionOnly.refTypeId)) { + if (pMod->exceptionOnly.refTypeId != 0 && !Dbg::MatchType(basket.excepClassId, pMod->exceptionOnly.refTypeId)) { return false; } - if ((basket->caught && !pMod->exceptionOnly.caught) || (!basket->caught && !pMod->exceptionOnly.uncaught)) { + if ((basket.caught && !pMod->exceptionOnly.caught) || (!basket.caught && !pMod->exceptionOnly.uncaught)) { return false; } break; case MK_FIELD_ONLY: - if (pMod->fieldOnly.fieldId != basket->fieldId) { + if (pMod->fieldOnly.fieldId != basket.fieldId) { return false; } - if (!Dbg::MatchType(basket->fieldTypeID, pMod->fieldOnly.refTypeId)) { + if (!Dbg::MatchType(basket.fieldTypeID, pMod->fieldOnly.refTypeId)) { return false; } break; case MK_STEP: - if (pMod->step.threadId != basket->threadId) { + if (pMod->step.threadId != basket.threadId) { return false; } break; case MK_INSTANCE_ONLY: - if (pMod->instanceOnly.objectId != basket->thisPtr) { + if (pMod->instanceOnly.objectId != basket.thisPtr) { return false; } break; @@ -530,19 +531,16 @@ static bool ModsMatch(JdwpEvent* pEvent, ModBasket* basket) * DO NOT call this multiple times for the same eventKind, as Count mods are * decremented during the scan. */ -void JdwpState::FindMatchingEvents(JdwpEventKind eventKind, ModBasket* basket, - JdwpEvent** match_list, int* pMatchCount) { +void JdwpState::FindMatchingEvents(JdwpEventKind eventKind, const ModBasket& basket, + JdwpEvent** match_list, size_t* pMatchCount) { /* start after the existing entries */ match_list += *pMatchCount; - JdwpEvent* pEvent = event_list_; - while (pEvent != NULL) { + for (JdwpEvent* pEvent = event_list_; pEvent != nullptr; pEvent = pEvent->next) { if (pEvent->eventKind == eventKind && ModsMatch(pEvent, basket)) { *match_list++ = pEvent; (*pMatchCount)++; } - - pEvent = pEvent->next; } } @@ -774,6 +772,22 @@ bool JdwpState::PostVMStart() { return true; } +static void LogMatchingEventsAndThread(JdwpEvent** match_list, size_t match_count, + const ModBasket& basket) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (size_t i = 0; i < match_count; ++i) { + JdwpEvent* pEvent = match_list[i]; + VLOG(jdwp) << "EVENT #" << i << ": " << pEvent->eventKind + << StringPrintf(" (requestId=%#" PRIx32 ")", pEvent->requestId); + } + std::string thread_name; + JdwpError error = Dbg::GetThreadName(basket.threadId, thread_name); + if (error != JDWP::ERR_NONE) { + thread_name = "<unknown>"; + } + VLOG(jdwp) << StringPrintf(" thread=%#" PRIx64, basket.threadId) << " " << thread_name; +} + /* * A location of interest has been reached. This handles: * Breakpoint @@ -829,39 +843,40 @@ bool JdwpState::PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, in return false; } - int match_count = 0; + size_t match_count = 0; ExpandBuf* pReq = NULL; JdwpSuspendPolicy suspend_policy = SP_NONE; { MutexLock mu(Thread::Current(), event_list_lock_); JdwpEvent** match_list = AllocMatchList(event_list_size_); if ((eventFlags & Dbg::kBreakpoint) != 0) { - FindMatchingEvents(EK_BREAKPOINT, &basket, match_list, &match_count); + FindMatchingEvents(EK_BREAKPOINT, basket, match_list, &match_count); } if ((eventFlags & Dbg::kSingleStep) != 0) { - FindMatchingEvents(EK_SINGLE_STEP, &basket, match_list, &match_count); + FindMatchingEvents(EK_SINGLE_STEP, basket, match_list, &match_count); } if ((eventFlags & Dbg::kMethodEntry) != 0) { - FindMatchingEvents(EK_METHOD_ENTRY, &basket, match_list, &match_count); + FindMatchingEvents(EK_METHOD_ENTRY, basket, match_list, &match_count); } if ((eventFlags & Dbg::kMethodExit) != 0) { - FindMatchingEvents(EK_METHOD_EXIT, &basket, match_list, &match_count); - FindMatchingEvents(EK_METHOD_EXIT_WITH_RETURN_VALUE, &basket, match_list, &match_count); + FindMatchingEvents(EK_METHOD_EXIT, basket, match_list, &match_count); + FindMatchingEvents(EK_METHOD_EXIT_WITH_RETURN_VALUE, basket, match_list, &match_count); } if (match_count != 0) { - VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) " - << basket.className << "." << Dbg::GetMethodName(pLoc->method_id) - << StringPrintf(" thread=%#" PRIx64 " dex_pc=%#" PRIx64 ")", - basket.threadId, pLoc->dex_pc); - suspend_policy = scanSuspendPolicy(match_list, match_count); - VLOG(jdwp) << " suspend_policy=" << suspend_policy; + + if (VLOG_IS_ON(jdwp)) { + LogMatchingEventsAndThread(match_list, match_count, basket); + VLOG(jdwp) << " location=" << *pLoc; + VLOG(jdwp) << StringPrintf(" this=%#" PRIx64, basket.thisPtr); + VLOG(jdwp) << " suspend_policy=" << suspend_policy; + } pReq = eventPrep(); expandBufAdd1(pReq, suspend_policy); expandBufAdd4BE(pReq, match_count); - for (int i = 0; i < match_count; i++) { + for (size_t i = 0; i < match_count; i++) { expandBufAdd1(pReq, match_list[i]->eventKind); expandBufAdd4BE(pReq, match_list[i]->requestId); expandBufAdd8BE(pReq, basket.threadId); @@ -892,6 +907,8 @@ bool JdwpState::PostFieldEvent(const JdwpLocation* pLoc, RefTypeId typeId, Field basket.fieldTypeID = typeId; basket.fieldId = fieldId; + DCHECK_EQ(fieldValue != nullptr, is_modification); + if (InvokeInProgress()) { VLOG(jdwp) << "Not posting field event during invoke"; return false; @@ -912,7 +929,7 @@ bool JdwpState::PostFieldEvent(const JdwpLocation* pLoc, RefTypeId typeId, Field return false; } - int match_count = 0; + size_t match_count = 0; ExpandBuf* pReq = NULL; JdwpSuspendPolicy suspend_policy = SP_NONE; { @@ -920,24 +937,29 @@ bool JdwpState::PostFieldEvent(const JdwpLocation* pLoc, RefTypeId typeId, Field JdwpEvent** match_list = AllocMatchList(event_list_size_); if (is_modification) { - FindMatchingEvents(EK_FIELD_MODIFICATION, &basket, match_list, &match_count); + FindMatchingEvents(EK_FIELD_MODIFICATION, basket, match_list, &match_count); } else { - FindMatchingEvents(EK_FIELD_ACCESS, &basket, match_list, &match_count); + FindMatchingEvents(EK_FIELD_ACCESS, basket, match_list, &match_count); } if (match_count != 0) { - VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) " - << basket.className << "." << Dbg::GetMethodName(pLoc->method_id) - << StringPrintf(" thread=%#" PRIx64 " dex_pc=%#" PRIx64 ")", - basket.threadId, pLoc->dex_pc); - suspend_policy = scanSuspendPolicy(match_list, match_count); - VLOG(jdwp) << " suspend_policy=" << suspend_policy; + + if (VLOG_IS_ON(jdwp)) { + LogMatchingEventsAndThread(match_list, match_count, basket); + VLOG(jdwp) << " location=" << *pLoc; + VLOG(jdwp) << StringPrintf(" this=%#" PRIx64, basket.thisPtr); + VLOG(jdwp) << StringPrintf(" type=%#" PRIx64, basket.fieldTypeID) << " " + << Dbg::GetClassName(basket.fieldTypeID); + VLOG(jdwp) << StringPrintf(" field=%#" PRIx32, basket.fieldId) << " " + << Dbg::GetFieldName(basket.fieldId); + VLOG(jdwp) << " suspend_policy=" << suspend_policy; + } pReq = eventPrep(); expandBufAdd1(pReq, suspend_policy); expandBufAdd4BE(pReq, match_count); - for (int i = 0; i < match_count; i++) { + for (size_t i = 0; i < match_count; i++) { expandBufAdd1(pReq, match_list[i]->eventKind); expandBufAdd4BE(pReq, match_list[i]->requestId); expandBufAdd8BE(pReq, basket.threadId); @@ -984,30 +1006,31 @@ bool JdwpState::PostThreadChange(ObjectId threadId, bool start) { ExpandBuf* pReq = NULL; JdwpSuspendPolicy suspend_policy = SP_NONE; - int match_count = 0; + size_t match_count = 0; { // Don't allow the list to be updated while we scan it. MutexLock mu(Thread::Current(), event_list_lock_); JdwpEvent** match_list = AllocMatchList(event_list_size_); if (start) { - FindMatchingEvents(EK_THREAD_START, &basket, match_list, &match_count); + FindMatchingEvents(EK_THREAD_START, basket, match_list, &match_count); } else { - FindMatchingEvents(EK_THREAD_DEATH, &basket, match_list, &match_count); + FindMatchingEvents(EK_THREAD_DEATH, basket, match_list, &match_count); } if (match_count != 0) { - VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) " - << StringPrintf("thread=%#" PRIx64, basket.threadId) << ")"; - suspend_policy = scanSuspendPolicy(match_list, match_count); - VLOG(jdwp) << " suspend_policy=" << suspend_policy; + + if (VLOG_IS_ON(jdwp)) { + LogMatchingEventsAndThread(match_list, match_count, basket); + VLOG(jdwp) << " suspend_policy=" << suspend_policy; + } pReq = eventPrep(); expandBufAdd1(pReq, suspend_policy); expandBufAdd4BE(pReq, match_count); - for (int i = 0; i < match_count; i++) { + for (size_t i = 0; i < match_count; i++) { expandBufAdd1(pReq, match_list[i]->eventKind); expandBufAdd4BE(pReq, match_list[i]->requestId); expandBufAdd8BE(pReq, basket.threadId); @@ -1072,33 +1095,35 @@ bool JdwpState::PostException(const JdwpLocation* pThrowLoc, return false; } - int match_count = 0; + size_t match_count = 0; ExpandBuf* pReq = NULL; JdwpSuspendPolicy suspend_policy = SP_NONE; { MutexLock mu(Thread::Current(), event_list_lock_); JdwpEvent** match_list = AllocMatchList(event_list_size_); - FindMatchingEvents(EK_EXCEPTION, &basket, match_list, &match_count); + FindMatchingEvents(EK_EXCEPTION, basket, match_list, &match_count); if (match_count != 0) { - VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total)" - << StringPrintf(" thread=%#" PRIx64, basket.threadId) - << StringPrintf(" exceptId=%#" PRIx64, exceptionId) - << " caught=" << basket.caught << ")" - << " throw: " << *pThrowLoc; - if (pCatchLoc->class_id == 0) { - VLOG(jdwp) << " catch: (not caught)"; - } else { - VLOG(jdwp) << " catch: " << *pCatchLoc; - } - suspend_policy = scanSuspendPolicy(match_list, match_count); - VLOG(jdwp) << " suspend_policy=" << suspend_policy; + + if (VLOG_IS_ON(jdwp)) { + LogMatchingEventsAndThread(match_list, match_count, basket); + VLOG(jdwp) << " throwLocation=" << *pThrowLoc; + if (pCatchLoc->class_id == 0) { + VLOG(jdwp) << " catchLocation=uncaught"; + } else { + VLOG(jdwp) << " catchLocation=" << *pCatchLoc; + } + VLOG(jdwp) << StringPrintf(" this=%#" PRIx64, basket.thisPtr); + VLOG(jdwp) << StringPrintf(" exceptionClass=%#" PRIx64, basket.excepClassId) << " " + << Dbg::GetClassName(basket.excepClassId); + VLOG(jdwp) << " suspend_policy=" << suspend_policy; + } pReq = eventPrep(); expandBufAdd1(pReq, suspend_policy); expandBufAdd4BE(pReq, match_count); - for (int i = 0; i < match_count; i++) { + for (size_t i = 0; i < match_count; i++) { expandBufAdd1(pReq, match_list[i]->eventKind); expandBufAdd4BE(pReq, match_list[i]->requestId); expandBufAdd8BE(pReq, basket.threadId); @@ -1142,17 +1167,19 @@ bool JdwpState::PostClassPrepare(JdwpTypeTag tag, RefTypeId refTypeId, const std ExpandBuf* pReq = NULL; JdwpSuspendPolicy suspend_policy = SP_NONE; - int match_count = 0; + size_t match_count = 0; { MutexLock mu(Thread::Current(), event_list_lock_); JdwpEvent** match_list = AllocMatchList(event_list_size_); - FindMatchingEvents(EK_CLASS_PREPARE, &basket, match_list, &match_count); + FindMatchingEvents(EK_CLASS_PREPARE, basket, match_list, &match_count); if (match_count != 0) { - VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) " - << StringPrintf("thread=%#" PRIx64, basket.threadId) << ") " << signature; - suspend_policy = scanSuspendPolicy(match_list, match_count); - VLOG(jdwp) << " suspend_policy=" << suspend_policy; + + if (VLOG_IS_ON(jdwp)) { + LogMatchingEventsAndThread(match_list, match_count, basket); + VLOG(jdwp) << StringPrintf(" type=%#" PRIx64, basket.classId)<< " " << signature; + VLOG(jdwp) << " suspend_policy=" << suspend_policy; + } if (basket.threadId == debug_thread_id_) { /* @@ -1171,7 +1198,7 @@ bool JdwpState::PostClassPrepare(JdwpTypeTag tag, RefTypeId refTypeId, const std expandBufAdd1(pReq, suspend_policy); expandBufAdd4BE(pReq, match_count); - for (int i = 0; i < match_count; i++) { + for (size_t i = 0; i < match_count; i++) { expandBufAdd1(pReq, match_list[i]->eventKind); expandBufAdd4BE(pReq, match_list[i]->requestId); expandBufAdd8BE(pReq, basket.threadId); diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc index 81a86235ec..8d987dfd9a 100644 --- a/runtime/mem_map.cc +++ b/runtime/mem_map.cc @@ -34,6 +34,13 @@ #ifdef USE_ASHMEM #include <cutils/ashmem.h> +#ifndef ANDROID_OS +#include <sys/resource.h> +#endif +#endif + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON #endif namespace art { @@ -179,20 +186,32 @@ MemMap* MemMap::MapAnonymous(const char* name, byte* expected, size_t byte_count } size_t page_aligned_byte_count = RoundUp(byte_count, kPageSize); + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + ScopedFd fd(-1); + #ifdef USE_ASHMEM - // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are - // prefixed "dalvik-". - std::string debug_friendly_name("dalvik-"); - debug_friendly_name += name; - ScopedFd fd(ashmem_create_region(debug_friendly_name.c_str(), page_aligned_byte_count)); - if (fd.get() == -1) { - *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s", name, strerror(errno)); - return nullptr; - } - int flags = MAP_PRIVATE; +#ifdef HAVE_ANDROID_OS + const bool use_ashmem = true; #else - ScopedFd fd(-1); - int flags = MAP_PRIVATE | MAP_ANONYMOUS; + // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't + // fail due to ulimit restrictions. If they will then use a regular mmap. + struct rlimit rlimit_fsize; + CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0); + const bool use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) || + (page_aligned_byte_count < rlimit_fsize.rlim_cur); +#endif + if (use_ashmem) { + // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are + // prefixed "dalvik-". + std::string debug_friendly_name("dalvik-"); + debug_friendly_name += name; + fd.reset(ashmem_create_region(debug_friendly_name.c_str(), page_aligned_byte_count)); + if (fd.get() == -1) { + *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s", name, strerror(errno)); + return nullptr; + } + flags = MAP_PRIVATE; + } #endif // We need to store and potentially set an error number for pretty printing of errors diff --git a/runtime/monitor.cc b/runtime/monitor.cc index a19445b189..999a9e504b 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -1115,20 +1115,29 @@ void MonitorList::SweepMonitorList(IsMarkedCallback* callback, void* arg) { } } +struct MonitorDeflateArgs { + MonitorDeflateArgs() : self(Thread::Current()), deflate_count(0) {} + Thread* const self; + size_t deflate_count; +}; + static mirror::Object* MonitorDeflateCallback(mirror::Object* object, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - if (Monitor::Deflate(reinterpret_cast<Thread*>(arg), object)) { + MonitorDeflateArgs* args = reinterpret_cast<MonitorDeflateArgs*>(arg); + if (Monitor::Deflate(args->self, object)) { DCHECK_NE(object->GetLockWord(true).GetState(), LockWord::kFatLocked); + ++args->deflate_count; // If we deflated, return nullptr so that the monitor gets removed from the array. return nullptr; } return object; // Monitor was not deflated. } -void MonitorList::DeflateMonitors() { - Thread* self = Thread::Current(); - Locks::mutator_lock_->AssertExclusiveHeld(self); - SweepMonitorList(MonitorDeflateCallback, reinterpret_cast<Thread*>(self)); +size_t MonitorList::DeflateMonitors() { + MonitorDeflateArgs args; + Locks::mutator_lock_->AssertExclusiveHeld(args.self); + SweepMonitorList(MonitorDeflateCallback, &args); + return args.deflate_count; } MonitorInfo::MonitorInfo(mirror::Object* obj) : owner_(NULL), entry_count_(0) { diff --git a/runtime/monitor.h b/runtime/monitor.h index a28823d184..d7552a3fd5 100644 --- a/runtime/monitor.h +++ b/runtime/monitor.h @@ -229,7 +229,8 @@ class MonitorList { LOCKS_EXCLUDED(monitor_list_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void DisallowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_); void AllowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_); - void DeflateMonitors() LOCKS_EXCLUDED(monitor_list_lock_) + // Returns how many monitors were deflated. + size_t DeflateMonitors() LOCKS_EXCLUDED(monitor_list_lock_) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); private: diff --git a/runtime/utils.cc b/runtime/utils.cc index f60f795e18..e5b8b224df 100644 --- a/runtime/utils.cc +++ b/runtime/utils.cc @@ -468,11 +468,12 @@ std::string PrettySize(int64_t byte_count) { negative_str, byte_count / kBytesPerUnit[i], kUnitStrings[i]); } -std::string PrettyDuration(uint64_t nano_duration) { +std::string PrettyDuration(uint64_t nano_duration, size_t max_fraction_digits) { if (nano_duration == 0) { return "0"; } else { - return FormatDuration(nano_duration, GetAppropriateTimeUnit(nano_duration)); + return FormatDuration(nano_duration, GetAppropriateTimeUnit(nano_duration), + max_fraction_digits); } } @@ -509,45 +510,41 @@ uint64_t GetNsToTimeUnitDivisor(TimeUnit time_unit) { return 0; } -std::string FormatDuration(uint64_t nano_duration, TimeUnit time_unit) { - const char* unit = NULL; +std::string FormatDuration(uint64_t nano_duration, TimeUnit time_unit, + size_t max_fraction_digits) { + const char* unit = nullptr; uint64_t divisor = GetNsToTimeUnitDivisor(time_unit); - uint32_t zero_fill = 1; switch (time_unit) { case kTimeUnitSecond: unit = "s"; - zero_fill = 9; break; case kTimeUnitMillisecond: unit = "ms"; - zero_fill = 6; break; case kTimeUnitMicrosecond: unit = "us"; - zero_fill = 3; break; case kTimeUnitNanosecond: unit = "ns"; - zero_fill = 0; break; } - - uint64_t whole_part = nano_duration / divisor; + const uint64_t whole_part = nano_duration / divisor; uint64_t fractional_part = nano_duration % divisor; if (fractional_part == 0) { return StringPrintf("%" PRIu64 "%s", whole_part, unit); } else { - while ((fractional_part % 1000) == 0) { - zero_fill -= 3; - fractional_part /= 1000; - } - if (zero_fill == 3) { - return StringPrintf("%" PRIu64 ".%03" PRIu64 "%s", whole_part, fractional_part, unit); - } else if (zero_fill == 6) { - return StringPrintf("%" PRIu64 ".%06" PRIu64 "%s", whole_part, fractional_part, unit); - } else { - return StringPrintf("%" PRIu64 ".%09" PRIu64 "%s", whole_part, fractional_part, unit); + static constexpr size_t kMaxDigits = 30; + char fraction_buffer[kMaxDigits]; + char* ptr = fraction_buffer; + uint64_t multiplier = 10; + // This infinite loops if fractional part is 0. + while (fractional_part * multiplier < divisor) { + multiplier *= 10; + *ptr++ = '0'; } + sprintf(ptr, "%" PRIu64, fractional_part); + fraction_buffer[std::min(kMaxDigits - 1, max_fraction_digits)] = '\0'; + return StringPrintf("%" PRIu64 ".%s%s", whole_part, fraction_buffer, unit); } } diff --git a/runtime/utils.h b/runtime/utils.h index 6d52459ec8..a61d30fb43 100644 --- a/runtime/utils.h +++ b/runtime/utils.h @@ -265,10 +265,11 @@ std::string PrettySize(int64_t size_in_bytes); // Returns a human-readable time string which prints every nanosecond while trying to limit the // number of trailing zeros. Prints using the largest human readable unit up to a second. // e.g. "1ms", "1.000000001s", "1.001us" -std::string PrettyDuration(uint64_t nano_duration); +std::string PrettyDuration(uint64_t nano_duration, size_t max_fraction_digits = 3); // Format a nanosecond time to specified units. -std::string FormatDuration(uint64_t nano_duration, TimeUnit time_unit); +std::string FormatDuration(uint64_t nano_duration, TimeUnit time_unit, + size_t max_fraction_digits); // Get the appropriate unit for a nanosecond duration. TimeUnit GetAppropriateTimeUnit(uint64_t nano_duration); diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc index 4a1e477bbd..7cd5980c44 100644 --- a/runtime/utils_test.cc +++ b/runtime/utils_test.cc @@ -171,14 +171,15 @@ TEST_F(UtilsTest, PrettyDuration) { EXPECT_EQ("10s", PrettyDuration(10 * one_sec)); EXPECT_EQ("100s", PrettyDuration(100 * one_sec)); EXPECT_EQ("1.001s", PrettyDuration(1 * one_sec + one_ms)); - EXPECT_EQ("1.000001s", PrettyDuration(1 * one_sec + one_us)); - EXPECT_EQ("1.000000001s", PrettyDuration(1 * one_sec + 1)); + EXPECT_EQ("1.000001s", PrettyDuration(1 * one_sec + one_us, 6)); + EXPECT_EQ("1.000000001s", PrettyDuration(1 * one_sec + 1, 9)); + EXPECT_EQ("1.000s", PrettyDuration(1 * one_sec + one_us, 3)); EXPECT_EQ("1ms", PrettyDuration(1 * one_ms)); EXPECT_EQ("10ms", PrettyDuration(10 * one_ms)); EXPECT_EQ("100ms", PrettyDuration(100 * one_ms)); EXPECT_EQ("1.001ms", PrettyDuration(1 * one_ms + one_us)); - EXPECT_EQ("1.000001ms", PrettyDuration(1 * one_ms + 1)); + EXPECT_EQ("1.000001ms", PrettyDuration(1 * one_ms + 1, 6)); EXPECT_EQ("1us", PrettyDuration(1 * one_us)); EXPECT_EQ("10us", PrettyDuration(10 * one_us)); diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc index f8e75ea850..89cfcdd1de 100644 --- a/runtime/verifier/method_verifier.cc +++ b/runtime/verifier/method_verifier.cc @@ -717,13 +717,28 @@ bool MethodVerifier::VerifyInstruction(const Instruction* inst, uint32_t code_of case Instruction::kVerifySwitchTargets: result = result && CheckSwitchTargets(code_offset); break; + case Instruction::kVerifyVarArgNonZero: + // Fall-through. case Instruction::kVerifyVarArg: { + if (inst->GetVerifyExtraFlags() == Instruction::kVerifyVarArgNonZero && inst->VRegA() <= 0) { + Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid arg count (" << inst->VRegA() << ") in " + "non-range invoke"; + return false; + } uint32_t args[Instruction::kMaxVarArgRegs]; inst->GetVarArgs(args); result = result && CheckVarArgRegs(inst->VRegA(), args); break; } + case Instruction::kVerifyVarArgRangeNonZero: + // Fall-through. case Instruction::kVerifyVarArgRange: + if (inst->GetVerifyExtraFlags() == Instruction::kVerifyVarArgRangeNonZero && + inst->VRegA() <= 0) { + Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid arg count (" << inst->VRegA() << ") in " + "range invoke"; + return false; + } result = result && CheckVarArgRangeRegs(inst->VRegA(), inst->VRegC()); break; case Instruction::kVerifyError: diff --git a/test/003-omnibus-opcodes/build b/test/003-omnibus-opcodes/build index 9dff837ab6..f909fb2219 100644 --- a/test/003-omnibus-opcodes/build +++ b/test/003-omnibus-opcodes/build @@ -22,5 +22,5 @@ ${JAVAC} -d classes `find src -name '*.java'` rm classes/UnresClass.class ${JAVAC} -d classes `find src2 -name '*.java'` -${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex classes +${DX} -JXmx256m --debug --dex --output=classes.dex classes zip $TEST_NAME.jar classes.dex diff --git a/test/056-const-string-jumbo/build b/test/056-const-string-jumbo/build index a12c9d32b5..ef286d140e 100644 --- a/test/056-const-string-jumbo/build +++ b/test/056-const-string-jumbo/build @@ -42,5 +42,5 @@ function writeFile(name, start, end) { mkdir classes ${JAVAC} -d classes src/*.java -${DX} -JXmx500m --debug --dex --no-optimize --positions=none --no-locals --dump-to=classes.lst --output=classes.dex classes +${DX} -JXmx500m --debug --dex --no-optimize --positions=none --no-locals --output=classes.dex classes zip $TEST_NAME.jar classes.dex diff --git a/test/302-float-conversion/expected.txt b/test/302-float-conversion/expected.txt index 7d5c1eba62..04230761af 100644 --- a/test/302-float-conversion/expected.txt +++ b/test/302-float-conversion/expected.txt @@ -1,2 +1,3 @@ Iteration Result is as expected inter4:2.0 +max_long:9223372036854775807 diff --git a/test/302-float-conversion/src/Main.java b/test/302-float-conversion/src/Main.java index afc5e976d9..27331353f0 100644 --- a/test/302-float-conversion/src/Main.java +++ b/test/302-float-conversion/src/Main.java @@ -21,6 +21,7 @@ public class Main { public static void main(String args[]) { test1(); test2(); + test3(); } public static void test1() { @@ -55,4 +56,9 @@ public class Main { System.out.println("inter4:" + inter4); } + public static void test3() { + double d = Long.MAX_VALUE; + System.out.println("max_long:" + (long)d); + } + } diff --git a/test/303-verification-stress/build b/test/303-verification-stress/build index 2ef9beafd1..c1935d2e9b 100644 --- a/test/303-verification-stress/build +++ b/test/303-verification-stress/build @@ -24,5 +24,5 @@ gcc -o classes-gen classes-gen.c mkdir classes ${JAVAC} -d classes src/*.java -${DX} --debug --dex --dump-to=classes.lst --output=classes.dex classes +${DX} --debug --dex --output=classes.dex classes zip $TEST_NAME.jar classes.dex diff --git a/test/run-test b/test/run-test index 34b06cc3a1..d1c5bb2360 100755 --- a/test/run-test +++ b/test/run-test @@ -298,6 +298,17 @@ chmod 755 "$run" export TEST_NAME=`basename ${test_dir}` +# To cause tests to fail fast, limit the file sizes created by dx, dex2oat and ART output to 2MB. +file_size_limit=2048 +if echo "$test_dir" | grep 089; then + file_size_limit=5120 +elif echo "$test_dir" | grep 083; then + file_size_limit=5120 +fi +if ! ulimit "$file_size_limit"; then + echo "ulimit file size setting failed" +fi + good="no" if [ "$dev_mode" = "yes" ]; then "./${build}" 2>&1 @@ -376,7 +387,7 @@ fi echo '#################### info' cat "${td_info}" | sed 's/^/# /g' echo '#################### diffs' - diff --strip-trailing-cr -u "$expected" "$output" + diff --strip-trailing-cr -u "$expected" "$output" | tail -n 500 echo '####################' echo ' ' fi |