diff options
30 files changed, 2057 insertions, 416 deletions
diff --git a/build/art.go b/build/art.go index 26794a0a9a..3aaa3eee21 100644 --- a/build/art.go +++ b/build/art.go @@ -54,6 +54,8 @@ func globalFlags(ctx android.LoadHookContext) ([]string, []string) { cflags = append(cflags, "-DART_USE_CXX_INTERPRETER=1") } + // TODO: deprecate and then eventually remove ART_USE_GENERATIONAL_CC in favor of + // ART_USE_GENERATIONAL_GC if !ctx.Config().IsEnvFalse("ART_USE_READ_BARRIER") && ctx.Config().ArtUseReadBarrier() { // Used to change the read barrier type. Valid values are BAKER, TABLELOOKUP. // The default is BAKER. @@ -65,8 +67,9 @@ func globalFlags(ctx android.LoadHookContext) ([]string, []string) { "-DART_USE_READ_BARRIER=1", "-DART_READ_BARRIER_TYPE_IS_"+barrierType+"=1") - if !ctx.Config().IsEnvFalse("ART_USE_GENERATIONAL_CC") { - cflags = append(cflags, "-DART_USE_GENERATIONAL_CC=1") + if !(ctx.Config().IsEnvFalse("ART_USE_GENERATIONAL_CC") || + ctx.Config().IsEnvFalse("ART_USE_GENERATIONAL_GC")) { + cflags = append(cflags, "-DART_USE_GENERATIONAL_GC=1") } // Force CC only if ART_USE_READ_BARRIER was set to true explicitly during // build time. @@ -76,6 +79,10 @@ func globalFlags(ctx android.LoadHookContext) ([]string, []string) { tlab = true } else if gcType == "CMC" { tlab = true + if !(ctx.Config().IsEnvFalse("ART_USE_GENERATIONAL_CC") || + ctx.Config().IsEnvFalse("ART_USE_GENERATIONAL_GC")) { + cflags = append(cflags, "-DART_USE_GENERATIONAL_GC=1") + } } if tlab { diff --git a/build/flags/art-flags.aconfig b/build/flags/art-flags.aconfig index 825a888330..38ef3fe5dd 100644 --- a/build/flags/art-flags.aconfig +++ b/build/flags/art-flags.aconfig @@ -15,6 +15,16 @@ package: "com.android.art.flags" container: "com.android.art" +# Flag for generational CMC feature +flag { + name: "use_generational_cmc" + namespace: "art_performance" + description: "Flag to control whether CMC's generational logic should be used or not" + bug: "343220989" + is_fixed_read_only: true + is_exported: false +} + # Flag for the experimental feature of on-demand method tracing flag { name: "always_enable_profile_code" diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h index fe7a55d559..14a1f89060 100644 --- a/cmdline/cmdline_types.h +++ b/cmdline/cmdline_types.h @@ -552,7 +552,7 @@ struct XGcOption { gc::CollectorType collector_type_ = gc::kCollectorTypeDefault; bool verify_pre_gc_heap_ = false; bool verify_pre_sweeping_heap_ = kIsDebugBuild; - bool generational_cc = kEnableGenerationalCCByDefault; + bool generational_gc = kEnableGenerationalGCByDefault; bool verify_post_gc_heap_ = kIsDebugBuild; bool verify_pre_gc_rosalloc_ = kIsDebugBuild; bool verify_pre_sweeping_rosalloc_ = false; @@ -566,7 +566,8 @@ template <> struct CmdlineType<XGcOption> : CmdlineTypeParser<XGcOption> { Result Parse(const std::string& option) { // -Xgc: already stripped XGcOption xgc{}; - + // TODO: Deprecate and eventually remove -Xgc:[no]generational_cc option in + // favor of -Xgc:[no]generational_gc. std::vector<std::string> gc_options; Split(option, ',', &gc_options); for (const std::string& gc_option : gc_options) { @@ -581,20 +582,20 @@ struct CmdlineType<XGcOption> : CmdlineTypeParser<XGcOption> { xgc.verify_pre_sweeping_heap_ = true; } else if (gc_option == "nopresweepingverify") { xgc.verify_pre_sweeping_heap_ = false; - } else if (gc_option == "generational_cc") { - // Note: Option "-Xgc:generational_cc" can be passed directly by + } else if (gc_option == "generational_cc" || gc_option == "generational_gc") { + // Note: Option "-Xgc:generational_gc" can be passed directly by // app_process/zygote (see `android::AndroidRuntime::startVm`). If this // option is ever deprecated, it should still be accepted (but ignored) // for compatibility reasons (this should not prevent the runtime from // starting up). - xgc.generational_cc = true; - } else if (gc_option == "nogenerational_cc") { - // Note: Option "-Xgc:nogenerational_cc" can be passed directly by + xgc.generational_gc = true; + } else if (gc_option == "nogenerational_cc" || gc_option == "nogenerational_gc") { + // Note: Option "-Xgc:nogenerational_gc" can be passed directly by // app_process/zygote (see `android::AndroidRuntime::startVm`). If this // option is ever deprecated, it should still be accepted (but ignored) // for compatibility reasons (this should not prevent the runtime from // starting up). - xgc.generational_cc = false; + xgc.generational_gc = false; } else if (gc_option == "postverify") { xgc.verify_post_gc_heap_ = true; } else if (gc_option == "nopostverify") { diff --git a/compiler/Android.bp b/compiler/Android.bp index 0a1af65a97..c904746435 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -479,6 +479,7 @@ art_cc_defaults { "optimizing/nodes_test.cc", "optimizing/nodes_vector_test.cc", "optimizing/parallel_move_test.cc", + "optimizing/prepare_for_register_allocation_test.cc", "optimizing/pretty_printer_test.cc", "optimizing/reference_type_propagation_test.cc", "optimizing/side_effects_test.cc", diff --git a/compiler/optimizing/code_flow_simplifier_test.cc b/compiler/optimizing/code_flow_simplifier_test.cc index dc4268a0aa..a382f0f6f6 100644 --- a/compiler/optimizing/code_flow_simplifier_test.cc +++ b/compiler/optimizing/code_flow_simplifier_test.cc @@ -71,4 +71,44 @@ TEST_F(CodeFlowSimplifierTest, testSelectWithAdd) { EXPECT_TRUE(phi->GetBlock() == nullptr); } +// Test `HSelect` optimization in an irreducible loop. +TEST_F(CodeFlowSimplifierTest, testSelectInIrreducibleLoop) { + HBasicBlock* return_block = InitEntryMainExitGraphWithReturnVoid(); + auto [split, left_header, right_header, body] = CreateIrreducibleLoop(return_block); + + HParameterValue* split_param = MakeParam(DataType::Type::kBool); + HParameterValue* bool_param = MakeParam(DataType::Type::kBool); + HParameterValue* n_param = MakeParam(DataType::Type::kInt32); + + MakeIf(split, split_param); + + HInstruction* const0 = graph_->GetIntConstant(0); + HInstruction* const1 = graph_->GetIntConstant(1); + HPhi* right_phi = MakePhi(right_header, {const0, /* placeholder */ const0}); + HPhi* left_phi = MakePhi(left_header, {const1, right_phi}); + HAdd* add = MakeBinOp<HAdd>(body, DataType::Type::kInt32, left_phi, const1); + right_phi->ReplaceInput(add, 1u); // Update back-edge input. + HCondition* condition = MakeCondition(left_header, kCondGE, left_phi, n_param); + MakeIf(left_header, condition); + + auto [if_block, then_block, else_block] = CreateDiamondPattern(body, bool_param); + HPhi* phi = MakePhi(body, {const1, const0}); + + EXPECT_TRUE(CheckGraphAndTryCodeFlowSimplifier()); + HLoopInformation* loop_info = left_header->GetLoopInformation(); + ASSERT_TRUE(loop_info != nullptr); + ASSERT_TRUE(loop_info->IsIrreducible()); + + EXPECT_TRUE(phi->GetBlock() == nullptr); + ASSERT_TRUE(if_block->GetFirstInstruction()->IsSelect()); + + ASSERT_EQ(if_block, add->GetBlock()); // Moved when merging blocks. + + for (HBasicBlock* removed_block : {then_block, else_block, body}) { + uint32_t removed_block_id = removed_block->GetBlockId(); + ASSERT_TRUE(removed_block->GetGraph() == nullptr) << removed_block_id; + ASSERT_FALSE(loop_info->GetBlocks().IsBitSet(removed_block_id)) << removed_block_id; + } +} + } // namespace art diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index a0ec99ffc3..a8d487e51a 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -512,6 +512,8 @@ class HGraphVisualizerPrinter final : public HGraphDelegateVisitor { void VisitCondition(HCondition* condition) override { StartAttributeStream("bias") << condition->GetBias(); + StartAttributeStream("emitted_at_use_site") + << std::boolalpha << condition->IsEmittedAtUseSite() << std::noboolalpha; } void VisitIf(HIf* if_instr) override { diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index 71c4f7aeeb..491045de99 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -59,6 +59,8 @@ class HX86LoadFromConstantTable final : public HExpression<2> { return InputAt(1)->AsConstant(); } + bool CanBeMoved() const override { return true; } + DECLARE_INSTRUCTION(X86LoadFromConstantTable); protected: diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 018ffce196..8115ea035d 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -389,6 +389,56 @@ class OptimizingUnitTestHelper { return {pre_header, loop}; } + // Insert blocks for an irreducible loop before the `loop_exit`: + // + // <loop_exit's old predecessor> + // | + // split + // / \ + // left_preheader right_preheader + // | | + // left_header <------- right_header <-+ + // | | | + // | +------------> body ------------+ + // | + // loop_exit + // + // Note that `left_preheader`, `right_preheader` and `body` are needed to avoid critical edges. + // + // `HGoto` instructions are added to `left_preheader`, `right_preheader`, `body` + // and `right_header`. To complete the control flow, the caller should add `HIf` + // to `split` and `left_header`. + // + // Returns `{split, left_header, right_header, body}`. + std::tuple<HBasicBlock*, HBasicBlock*, HBasicBlock*, HBasicBlock*> CreateIrreducibleLoop( + HBasicBlock* loop_exit) { + HBasicBlock* split = AddNewBlock(); + HBasicBlock* left_preheader = AddNewBlock(); + HBasicBlock* right_preheader = AddNewBlock(); + HBasicBlock* left_header = AddNewBlock(); + HBasicBlock* right_header = AddNewBlock(); + HBasicBlock* body = AddNewBlock(); + + HBasicBlock* predecessor = loop_exit->GetSinglePredecessor(); + predecessor->ReplaceSuccessor(loop_exit, split); + + split->AddSuccessor(left_preheader); // true successor + split->AddSuccessor(right_preheader); // false successor + left_preheader->AddSuccessor(left_header); + right_preheader->AddSuccessor(right_header); + left_header->AddSuccessor(loop_exit); // true successor + left_header->AddSuccessor(body); // false successor + body->AddSuccessor(right_header); + right_header->AddSuccessor(left_header); + + MakeGoto(left_preheader); + MakeGoto(right_preheader); + MakeGoto(body); + MakeGoto(right_header); + + return {split, left_header, right_header, body}; + } + HBasicBlock* AddNewBlock() { HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block); diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 1eb340a9b4..c5dbab5f79 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -42,7 +42,8 @@ class PrepareForRegisterAllocationVisitor final : public HGraphDelegateVisitor { void VisitBoundType(HBoundType* bound_type) override; void VisitArraySet(HArraySet* instruction) override; void VisitClinitCheck(HClinitCheck* check) override; - void VisitCondition(HCondition* condition) override; + void VisitIf(HIf* if_instr) override; + void VisitSelect(HSelect* select) override; void VisitConstructorFence(HConstructorFence* constructor_fence) override; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override; void VisitDeoptimize(HDeoptimize* deoptimize) override; @@ -50,6 +51,7 @@ class PrepareForRegisterAllocationVisitor final : public HGraphDelegateVisitor { bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const; bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const; + void TryToMoveConditionToUser(HInstruction* maybe_condition, HInstruction* user); const CompilerOptions& compiler_options_; }; @@ -108,6 +110,7 @@ void PrepareForRegisterAllocationVisitor::VisitDeoptimize(HDeoptimize* deoptimiz deoptimize->ReplaceWith(deoptimize->GuardedInput()); deoptimize->RemoveGuard(); } + TryToMoveConditionToUser(deoptimize->InputAt(0), deoptimize); } void PrepareForRegisterAllocationVisitor::VisitBoundsCheck(HBoundsCheck* check) { @@ -206,37 +209,114 @@ void PrepareForRegisterAllocationVisitor::VisitClinitCheck(HClinitCheck* check) } } -bool PrepareForRegisterAllocationVisitor::CanEmitConditionAt(HCondition* condition, - HInstruction* user) const { - if (condition->GetNext() != user) { +// Determine if moving `condition` to `user` would observably extend the lifetime of a reference. +// By "observably" we understand that the reference would need to be visible to the GC for longer. +// We're not concerned with the lifetime for the purposes of register allocation here. +static bool ConditionMoveWouldExtendReferenceLifetime(HCondition* condition, HInstruction* user) { + HInstruction* lhs = condition->InputAt(0); + if (lhs->GetType() != DataType::Type::kReference) { + return false; + } + HInstruction* rhs = condition->InputAt(1); + DCHECK_EQ(rhs->GetType(), DataType::Type::kReference); + if (lhs->IsNullConstant() && rhs->IsNullConstant()) { + return false; + } + // Check if the last instruction with environment before `user` has all non-null + // inputs in the environment. If so, we would not be extending the lifetime. + HInstruction* instruction_with_env = user->GetPrevious(); + while (instruction_with_env != nullptr && + instruction_with_env != condition && + instruction_with_env->GetEnvironment() == nullptr) { + DCHECK(!instruction_with_env->GetSideEffects().Includes(SideEffects::CanTriggerGC())); + instruction_with_env = instruction_with_env->GetPrevious(); + } + if (instruction_with_env == nullptr) { + // No env use in the user's block. Do not search other blocks. Conservatively assume that + // moving the `condition` to the `user` would indeed extend the lifetime of a reference. + return true; + } + if (instruction_with_env == condition) { + // There is no instruction with an environment between `condition` and `user`, so moving + // the condition before the user shall not observably extend the lifetime of the reference. return false; } + DCHECK(instruction_with_env->HasEnvironment()); + auto env_inputs = instruction_with_env->GetEnvironment()->GetEnvInputs(); + auto extends_lifetime = [&](HInstruction* instruction) { + return !instruction->IsNullConstant() && + std::find(env_inputs.begin(), env_inputs.end(), instruction) == env_inputs.end(); + }; + return extends_lifetime(lhs) || extends_lifetime(rhs); +} + +bool PrepareForRegisterAllocationVisitor::CanEmitConditionAt(HCondition* condition, + HInstruction* user) const { + DCHECK(user->IsIf() || user->IsDeoptimize() || user->IsSelect()); if (GetGraph()->IsCompilingBaseline() && compiler_options_.ProfileBranches()) { // To do branch profiling, we cannot emit conditions at use site. return false; } - if (user->IsIf() || user->IsDeoptimize()) { - return true; + // Move only a single-user `HCondition` to the `user`. + if (!condition->HasOnlyOneNonEnvironmentUse()) { + return false; } + DCHECK(condition->GetUses().front().GetUser() == user); - if (user->IsSelect() && user->AsSelect()->GetCondition() == condition) { - return true; + if (condition->GetNext() != user) { + // Avoid moving across blocks if the graph has any irreducible loops. + if (condition->GetBlock() != user->GetBlock() && GetGraph()->HasIrreducibleLoops()) { + return false; + } + // Avoid extending the lifetime of references by moving the condition. + if (ConditionMoveWouldExtendReferenceLifetime(condition, user)) { + return false; + } } - return false; + return true; } -void PrepareForRegisterAllocationVisitor::VisitCondition(HCondition* condition) { - if (condition->HasOnlyOneNonEnvironmentUse()) { - HInstruction* user = condition->GetUses().front().GetUser(); - if (CanEmitConditionAt(condition, user)) { - condition->MarkEmittedAtUseSite(); +void PrepareForRegisterAllocationVisitor::TryToMoveConditionToUser(HInstruction* maybe_condition, + HInstruction* user) { + DCHECK(user->IsIf() || user->IsDeoptimize() || user->IsSelect()); + if (maybe_condition->IsCondition() && CanEmitConditionAt(maybe_condition->AsCondition(), user)) { + if (maybe_condition->GetNext() != user) { + maybe_condition->MoveBefore(user); +#ifdef ART_ENABLE_CODEGEN_x86 + for (HInstruction* input : maybe_condition->GetInputs()) { + if (input->IsEmittedAtUseSite()) { + DCHECK(input->IsX86LoadFromConstantTable()); + input->MoveBefore(maybe_condition); + HInstruction* inputs_input = input->InputAt(0); + DCHECK(inputs_input->IsX86ComputeBaseMethodAddress()); + if (inputs_input->HasOnlyOneNonEnvironmentUse()) { + inputs_input->MoveBefore(input); + } + } + } +#else // ART_ENABLE_CODEGEN_x86 + if (kIsDebugBuild) { + for (HInstruction* input : maybe_condition->GetInputs()) { + CHECK(!input->IsEmittedAtUseSite()) << input->DebugName() << "#" << input->GetId(); + } + } +#endif } + maybe_condition->MarkEmittedAtUseSite(); } } +void PrepareForRegisterAllocationVisitor::VisitIf(HIf* if_instr) { + TryToMoveConditionToUser(if_instr->InputAt(0), if_instr); +} + +void PrepareForRegisterAllocationVisitor::VisitSelect(HSelect* select) { + TryToMoveConditionToUser(select->GetCondition(), select); +} + void PrepareForRegisterAllocationVisitor::VisitConstructorFence( HConstructorFence* constructor_fence) { // Trivially remove redundant HConstructorFence when it immediately follows an HNewInstance diff --git a/compiler/optimizing/prepare_for_register_allocation_test.cc b/compiler/optimizing/prepare_for_register_allocation_test.cc new file mode 100644 index 0000000000..a5bbae19a2 --- /dev/null +++ b/compiler/optimizing/prepare_for_register_allocation_test.cc @@ -0,0 +1,312 @@ +/* + * Copyright (C) 2025 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "prepare_for_register_allocation.h" + +#include <gtest/gtest.h> + +#include "base/macros.h" +#include "optimizing_unit_test.h" + +namespace art HIDDEN { + +class PrepareForRegisterAllocationTest + : public CommonCompilerTest, public OptimizingUnitTestHelper { + protected: + void RunPass() { + graph_->BuildDominatorTree(); + PrepareForRegisterAllocation(graph_, *compiler_options_).Run(); + } +}; + +TEST_F(PrepareForRegisterAllocationTest, MergeConditionToSelect) { + HBasicBlock* ret = InitEntryMainExitGraphWithReturnVoid(); + + HInstruction* param = MakeParam(DataType::Type::kInt32); + HInstruction* zero_const = graph_->GetIntConstant(0); + HCondition* condition = MakeCondition(ret, kCondLT, param, zero_const); + HSelect* select = MakeSelect(ret, condition, zero_const, param); + + RunPass(); + + ASSERT_TRUE(condition->IsEmittedAtUseSite()); + ASSERT_EQ(condition->GetNext(), select); +} + +TEST_F(PrepareForRegisterAllocationTest, MergeConditionToDeoptimize) { + HBasicBlock* ret = InitEntryMainExitGraphWithReturnVoid(); + + HInstruction* param = MakeParam(DataType::Type::kInt32); + HInstruction* zero_const = graph_->GetIntConstant(0); + HCondition* condition = MakeCondition(ret, kCondLT, param, zero_const); + HDeoptimize* deopt = new (GetAllocator()) HDeoptimize( + GetAllocator(), condition, DeoptimizationKind::kAotInlineCache, /*dex_pc=*/ 0u); + AddOrInsertInstruction(ret, deopt); + + RunPass(); + + ASSERT_TRUE(condition->IsEmittedAtUseSite()); + ASSERT_EQ(condition->GetNext(), deopt); +} + +TEST_F(PrepareForRegisterAllocationTest, MergeConditionToIf) { + HBasicBlock* ret = InitEntryMainExitGraphWithReturnVoid(); + auto [start, left, right] = CreateDiamondPattern(ret); + + HInstruction* param = MakeParam(DataType::Type::kInt32); + HInstruction* zero_const = graph_->GetIntConstant(0); + HCondition* condition = MakeCondition(start, kCondLT, param, zero_const); + HIf* start_if = MakeIf(start, condition); + + RunPass(); + + ASSERT_TRUE(condition->IsEmittedAtUseSite()); + ASSERT_EQ(condition->GetNext(), start_if); +} + +TEST_F(PrepareForRegisterAllocationTest, MergeConditionToIfWithMove) { + HBasicBlock* ret = InitEntryMainExitGraphWithReturnVoid(); + auto [start, left, right] = CreateDiamondPattern(ret); + + HInstruction* param = MakeParam(DataType::Type::kInt32); + HInstruction* zero_const = graph_->GetIntConstant(0); + HCondition* condition = MakeCondition(start, kCondLT, param, zero_const); + HInstruction* add = MakeBinOp<HAdd>(start, DataType::Type::kInt32, param, param); + HIf* start_if = MakeIf(start, condition); + + ASSERT_EQ(condition->GetNext(), add); + ASSERT_EQ(add->GetNext(), start_if); + + RunPass(); + + ASSERT_TRUE(condition->IsEmittedAtUseSite()); + ASSERT_EQ(add->GetNext(), condition); + ASSERT_EQ(condition->GetNext(), start_if); +} + +TEST_F(PrepareForRegisterAllocationTest, MergeConditionToIfWithMoveFromPredecessor) { + HBasicBlock* ret = InitEntryMainExitGraphWithReturnVoid(); + auto [start, left, right_end] = CreateDiamondPattern(ret); + auto [right_start, right_left, right_right] = CreateDiamondPattern(right_end); + + HInstruction* cond_param = MakeParam(DataType::Type::kBool); + HInstruction* param = MakeParam(DataType::Type::kInt32); + HInstruction* zero_const = graph_->GetIntConstant(0); + HCondition* condition = MakeCondition(start, kCondLT, param, zero_const); + MakeIf(start, cond_param); + // Note: The condition for this `HIf` is in the predecessor block. + HIf* right_start_if = MakeIf(right_start, condition); + + ASSERT_NE(condition->GetBlock(), right_start_if->GetBlock()); + + RunPass(); + + ASSERT_TRUE(condition->IsEmittedAtUseSite()); + ASSERT_EQ(condition->GetBlock(), right_start_if->GetBlock()); + ASSERT_EQ(condition->GetNext(), right_start_if); +} + +TEST_F(PrepareForRegisterAllocationTest, MergeConditionPreventedByOtherUse) { + HBasicBlock* ret = InitEntryMainExitGraphWithReturnVoid(); + auto [start, left, right] = CreateDiamondPattern(ret); + + HInstruction* param = MakeParam(DataType::Type::kInt32); + HInstruction* zero_const = graph_->GetIntConstant(0); + HCondition* condition = MakeCondition(start, kCondLT, param, zero_const); + HIf* start_if = MakeIf(start, condition); + + // Other use. + MakeBinOp<HAdd>(ret, DataType::Type::kInt32, param, condition); + + RunPass(); + + ASSERT_TRUE(!condition->IsEmittedAtUseSite()); + ASSERT_EQ(condition->GetNext(), start_if); +} + +TEST_F(PrepareForRegisterAllocationTest, MergeConditionPreventedByEnvUse) { + HBasicBlock* ret = InitEntryMainExitGraphWithReturnVoid(); + auto [start, left, right] = CreateDiamondPattern(ret); + + HInstruction* param = MakeParam(DataType::Type::kInt32); + HInstruction* zero_const = graph_->GetIntConstant(0); + HCondition* condition = MakeCondition(start, kCondLT, param, zero_const); + HIf* start_if = MakeIf(start, condition); + + // Environment use. + MakeInvokeStatic(ret, DataType::Type::kVoid, /*args=*/ {}, /*env=*/ {condition}); + + RunPass(); + + ASSERT_TRUE(!condition->IsEmittedAtUseSite()); + ASSERT_EQ(condition->GetNext(), start_if); +} + +TEST_F(PrepareForRegisterAllocationTest, MergeConditionPrevented_RefNoEnvInBlock) { + ScopedObjectAccess soa(Thread::Current()); + VariableSizedHandleScope vshs(soa.Self()); + HBasicBlock* ret = InitEntryMainExitGraphWithReturnVoid(&vshs); + auto [start, left, right_end] = CreateDiamondPattern(ret); + auto [right_start, right_left, right_right] = CreateDiamondPattern(right_end); + + HInstruction* cond_param = MakeParam(DataType::Type::kBool); + HInstruction* param = MakeParam(DataType::Type::kReference); + HInstruction* null_const = graph_->GetNullConstant(); + HCondition* condition = MakeCondition(start, kCondEQ, param, null_const); + MakeIf(start, cond_param); + // Note: The condition for this `HIf` is in the predecessor block. + HIf* right_start_if = MakeIf(right_start, condition); + + RunPass(); + + ASSERT_TRUE(!condition->IsEmittedAtUseSite()); + ASSERT_NE(condition->GetBlock(), right_start_if->GetBlock()); // Not moved to the `HIf`. +} + +TEST_F(PrepareForRegisterAllocationTest, MergeCondition_RefsInEnv) { + ScopedObjectAccess soa(Thread::Current()); + VariableSizedHandleScope vshs(soa.Self()); + HBasicBlock* ret = InitEntryMainExitGraphWithReturnVoid(&vshs); + auto [start, left, right_end] = CreateDiamondPattern(ret); + + HInstruction* param1 = MakeParam(DataType::Type::kReference); + HInstruction* param2 = MakeParam(DataType::Type::kReference); + HCondition* condition = MakeCondition(start, kCondEQ, param1, param2); + + // This invoke's environment already contains `param1` and `param2`, so reordering + // the `condition` after the invoke would not extend their lifetime for the purpose of GC. + HInvoke* invoke = + MakeInvokeStatic(start, DataType::Type::kVoid, /*args=*/ {}, /*env=*/ {param1, param2}); + + HIf* start_if = MakeIf(start, condition); + + ASSERT_EQ(condition->GetNext(), invoke); + ASSERT_EQ(invoke->GetNext(), start_if); + + RunPass(); + + ASSERT_TRUE(condition->IsEmittedAtUseSite()); + ASSERT_EQ(invoke->GetNext(), condition); + ASSERT_EQ(condition->GetNext(), start_if); +} + +TEST_F(PrepareForRegisterAllocationTest, MergeCondition_RefLhsInEnv) { + ScopedObjectAccess soa(Thread::Current()); + VariableSizedHandleScope vshs(soa.Self()); + HBasicBlock* ret = InitEntryMainExitGraphWithReturnVoid(&vshs); + auto [start, left, right_end] = CreateDiamondPattern(ret); + + HInstruction* param = MakeParam(DataType::Type::kReference); + HInstruction* null_const = graph_->GetNullConstant(); + HCondition* condition = MakeCondition(start, kCondEQ, param, null_const); + + // This invoke's environment already contains `param`, so reordering the `condition` + // after the invoke would not extend its lifetime for the purpose of GC. + HInvoke* invoke = MakeInvokeStatic(start, DataType::Type::kVoid, /*args=*/ {}, /*env=*/ {param}); + + HIf* start_if = MakeIf(start, condition); + + ASSERT_EQ(condition->GetNext(), invoke); + ASSERT_EQ(invoke->GetNext(), start_if); + + RunPass(); + + ASSERT_TRUE(condition->IsEmittedAtUseSite()); + ASSERT_EQ(invoke->GetNext(), condition); + ASSERT_EQ(condition->GetNext(), start_if); +} + +TEST_F(PrepareForRegisterAllocationTest, MergeCondition_RefRhsInEnv) { + ScopedObjectAccess soa(Thread::Current()); + VariableSizedHandleScope vshs(soa.Self()); + HBasicBlock* ret = InitEntryMainExitGraphWithReturnVoid(&vshs); + auto [start, left, right_end] = CreateDiamondPattern(ret); + + HInstruction* param = MakeParam(DataType::Type::kReference); + HInstruction* null_const = graph_->GetNullConstant(); + HCondition* condition = MakeCondition(start, kCondEQ, null_const, param); + + // This invoke's environment already contains `param`, so reordering the `condition` + // after the invoke would not extend its lifetime for the purpose of GC. + HInvoke* invoke = MakeInvokeStatic(start, DataType::Type::kVoid, /*args=*/ {}, /*env=*/ {param}); + + HIf* start_if = MakeIf(start, condition); + + ASSERT_EQ(condition->GetNext(), invoke); + ASSERT_EQ(invoke->GetNext(), start_if); + + RunPass(); + + ASSERT_TRUE(condition->IsEmittedAtUseSite()); + ASSERT_EQ(invoke->GetNext(), condition); + ASSERT_EQ(condition->GetNext(), start_if); +} + +TEST_F(PrepareForRegisterAllocationTest, MergeConditionPrevented_RefLhsNotInEnv) { + ScopedObjectAccess soa(Thread::Current()); + VariableSizedHandleScope vshs(soa.Self()); + HBasicBlock* ret = InitEntryMainExitGraphWithReturnVoid(&vshs); + auto [start, left, right_end] = CreateDiamondPattern(ret); + + HInstruction* param1 = MakeParam(DataType::Type::kReference); + HInstruction* param2 = MakeParam(DataType::Type::kReference); + HCondition* condition = MakeCondition(start, kCondEQ, param1, param2); + + // This invoke's environment does not contain `param1`, so reordering the `condition` + // after the invoke would need to extend the lifetime of `param1` for the purpose of GC. + // We do not want to extend lifetime of references, therefore the optimization is skipped. + HInvoke* invoke = MakeInvokeStatic(start, DataType::Type::kVoid, /*args=*/ {}, /*env=*/ {param2}); + + HIf* start_if = MakeIf(start, condition); + + ASSERT_EQ(condition->GetNext(), invoke); + ASSERT_EQ(invoke->GetNext(), start_if); + + RunPass(); + + ASSERT_TRUE(!condition->IsEmittedAtUseSite()); + ASSERT_EQ(condition->GetNext(), invoke); + ASSERT_EQ(invoke->GetNext(), start_if); +} + +TEST_F(PrepareForRegisterAllocationTest, MergeConditionPrevented_RefRhsNotInEnv) { + ScopedObjectAccess soa(Thread::Current()); + VariableSizedHandleScope vshs(soa.Self()); + HBasicBlock* ret = InitEntryMainExitGraphWithReturnVoid(&vshs); + auto [start, left, right_end] = CreateDiamondPattern(ret); + + HInstruction* param1 = MakeParam(DataType::Type::kReference); + HInstruction* param2 = MakeParam(DataType::Type::kReference); + HCondition* condition = MakeCondition(start, kCondEQ, param1, param2); + + // This invoke's environment does not contain `param2`, so reordering the `condition` + // after the invoke would need to extend the lifetime of `param2` for the purpose of GC. + // We do not want to extend lifetime of references, therefore the optimization is skipped. + HInvoke* invoke = MakeInvokeStatic(start, DataType::Type::kVoid, /*args=*/ {}, /*env=*/ {param1}); + + HIf* start_if = MakeIf(start, condition); + + ASSERT_EQ(condition->GetNext(), invoke); + ASSERT_EQ(invoke->GetNext(), start_if); + + RunPass(); + + ASSERT_TRUE(!condition->IsEmittedAtUseSite()); + ASSERT_EQ(condition->GetNext(), invoke); + ASSERT_EQ(invoke->GetNext(), start_if); +} + +} // namespace art diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index 9185f79ab7..ca126e6454 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -526,9 +526,10 @@ class SCOPED_CAPABILITY MutexLock { // Pretend to acquire a mutex for checking purposes, without actually doing so. Use with // extreme caution when it is known the condition that the mutex would guard against cannot arise. +template <typename T> class SCOPED_CAPABILITY FakeMutexLock { public: - explicit FakeMutexLock(Mutex& mu) ACQUIRE(mu) NO_THREAD_SAFETY_ANALYSIS {} + explicit FakeMutexLock(T& mu) ACQUIRE(mu) NO_THREAD_SAFETY_ANALYSIS {} ~FakeMutexLock() RELEASE() NO_THREAD_SAFETY_ANALYSIS {} diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h index 213836e768..1b060f4cc9 100644 --- a/runtime/gc/accounting/card_table-inl.h +++ b/runtime/gc/accounting/card_table-inl.h @@ -51,11 +51,12 @@ static inline bool byte_cas(uint8_t old_value, uint8_t new_value, uint8_t* addre #endif } -template <bool kClearCard, typename Visitor> +template <bool kClearCard, typename Visitor, typename ModifyVisitor> inline size_t CardTable::Scan(ContinuousSpaceBitmap* bitmap, uint8_t* const scan_begin, uint8_t* const scan_end, const Visitor& visitor, + const ModifyVisitor& mod_visitor, const uint8_t minimum_age) { DCHECK_GE(scan_begin, reinterpret_cast<uint8_t*>(bitmap->HeapBegin())); // scan_end is the byte after the last byte we scan. @@ -69,9 +70,11 @@ inline size_t CardTable::Scan(ContinuousSpaceBitmap* bitmap, // Handle any unaligned cards at the start. while (!IsAligned<sizeof(intptr_t)>(card_cur) && card_cur < card_end) { - if (*card_cur >= minimum_age) { + uint8_t cur_val = *card_cur; + if (cur_val >= minimum_age) { uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(card_cur)); bitmap->VisitMarkedRange(start, start + kCardSize, visitor); + mod_visitor(card_cur, cur_val); ++cards_scanned; } ++card_cur; @@ -100,11 +103,13 @@ inline size_t CardTable::Scan(ContinuousSpaceBitmap* bitmap, // TODO: Investigate if processing continuous runs of dirty cards with // a single bitmap visit is more efficient. for (size_t i = 0; i < sizeof(uintptr_t); ++i) { - if (static_cast<uint8_t>(start_word) >= minimum_age) { + uint8_t cur_val = static_cast<uint8_t>(start_word); + if (cur_val >= minimum_age) { auto* card = reinterpret_cast<uint8_t*>(word_cur) + i; DCHECK(*card == static_cast<uint8_t>(start_word) || *card == kCardDirty) - << "card " << static_cast<size_t>(*card) << " intptr_t " << (start_word & 0xFF); + << "card " << static_cast<size_t>(*card) << " intptr_t " << cur_val; bitmap->VisitMarkedRange(start, start + kCardSize, visitor); + mod_visitor(card, cur_val); ++cards_scanned; } start_word >>= 8; @@ -116,9 +121,11 @@ inline size_t CardTable::Scan(ContinuousSpaceBitmap* bitmap, // Handle any unaligned cards at the end. card_cur = reinterpret_cast<uint8_t*>(word_end); while (card_cur < card_end) { - if (*card_cur >= minimum_age) { + uint8_t cur_val = *card_cur; + if (cur_val >= minimum_age) { uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(card_cur)); bitmap->VisitMarkedRange(start, start + kCardSize, visitor); + mod_visitor(card_cur, cur_val); ++cards_scanned; } ++card_cur; diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h index 72cf57119c..98ff107baf 100644 --- a/runtime/gc/accounting/card_table.h +++ b/runtime/gc/accounting/card_table.h @@ -21,6 +21,7 @@ #include "base/locks.h" #include "base/mem_map.h" +#include "base/utils.h" #include "runtime_globals.h" namespace art HIDDEN { @@ -49,8 +50,19 @@ class CardTable { static constexpr size_t kCardShift = 10; static constexpr size_t kCardSize = 1 << kCardShift; static constexpr uint8_t kCardClean = 0x0; + // Value written into the card by the write-barrier to indicate that + // reference(s) to some object starting in this card has been modified. static constexpr uint8_t kCardDirty = 0x70; + // Value to indicate that a dirty card is 'aged' now in the sense that it has + // been noticed by the GC and will be visited. static constexpr uint8_t kCardAged = kCardDirty - 1; + // Further ageing an aged card usually means clearing the card as we have + // already visited it when ageing it the first time. This value is used to + // avoid re-visiting (in the second pass of CMC marking phase) cards which + // contain old-to-young references and have not been dirtied since the first + // pass of marking. We can't simply clean these cards as they are needed later + // in compaction phase to update the old-to-young references. + static constexpr uint8_t kCardAged2 = kCardAged - 1; static CardTable* Create(const uint8_t* heap_begin, size_t heap_capacity); ~CardTable(); @@ -114,17 +126,33 @@ class CardTable { const Visitor& visitor, const ModifiedVisitor& modified); - // For every dirty at least minumum age between begin and end invoke the visitor with the - // specified argument. Returns how many cards the visitor was run on. - template <bool kClearCard, typename Visitor> + // For every dirty (at least minimum age) card between begin and end invoke + // bitmap's VisitMarkedRange() to invoke 'visitor' on every object in the + // card. Calls 'mod_visitor' for each such card in case the caller wants to + // modify the value. Returns how many cards the visitor was run on. + // NOTE: 'visitor' is called on one whole card at a time. Therefore, + // 'scan_begin' and 'scan_end' are aligned to card-size before visitor is + // called. Therefore visitor may get called on objects before 'scan_begin' + // and/or after 'scan_end'. Visitor shall detect that and act appropriately. + template <bool kClearCard, typename Visitor, typename ModifyVisitor> size_t Scan(SpaceBitmap<kObjectAlignment>* bitmap, uint8_t* scan_begin, uint8_t* scan_end, const Visitor& visitor, - const uint8_t minimum_age = kCardDirty) - REQUIRES(Locks::heap_bitmap_lock_) + const ModifyVisitor& mod_visitor, + const uint8_t minimum_age) REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_); + template <bool kClearCard, typename Visitor> + size_t Scan(SpaceBitmap<kObjectAlignment>* bitmap, + uint8_t* scan_begin, + uint8_t* scan_end, + const Visitor& visitor, + const uint8_t minimum_age = kCardDirty) REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES_SHARED(Locks::mutator_lock_) { + return Scan<kClearCard>(bitmap, scan_begin, scan_end, visitor, VoidFunctor(), minimum_age); + } + // Assertion used to check the given address is covered by the card table void CheckAddrIsInCardTable(const uint8_t* addr) const; @@ -169,7 +197,8 @@ class CardTable { class AgeCardVisitor { public: uint8_t operator()(uint8_t card) const { - return (card == accounting::CardTable::kCardDirty) ? card - 1 : 0; + return (card == accounting::CardTable::kCardDirty) ? accounting::CardTable::kCardAged + : accounting::CardTable::kCardClean; } }; diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h index 4f9e5a3652..a8dabde716 100644 --- a/runtime/gc/accounting/space_bitmap-inl.h +++ b/runtime/gc/accounting/space_bitmap-inl.h @@ -115,6 +115,7 @@ inline void SpaceBitmap<kAlignment>::VisitMarkedRange(uintptr_t visit_begin, } #else DCHECK_LE(heap_begin_, visit_begin); + DCHECK_LT(visit_begin, HeapLimit()); DCHECK_LE(visit_end, HeapLimit()); const uintptr_t offset_start = visit_begin - heap_begin_; diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc index f8eef84f25..0cfa198491 100644 --- a/runtime/gc/collector/garbage_collector.cc +++ b/runtime/gc/collector/garbage_collector.cc @@ -323,7 +323,7 @@ void GarbageCollector::SweepArray(accounting::ObjectStack* allocations, StackReference<mirror::Object>* out = objects; for (size_t i = 0; i < count; ++i) { mirror::Object* const obj = objects[i].AsMirrorPtr(); - if (kUseThreadLocalAllocationStack && obj == nullptr) { + if (obj == nullptr) { continue; } if (space->HasAddress(obj)) { diff --git a/runtime/gc/collector/mark_compact-inl.h b/runtime/gc/collector/mark_compact-inl.h index d840223720..70db85e657 100644 --- a/runtime/gc/collector/mark_compact-inl.h +++ b/runtime/gc/collector/mark_compact-inl.h @@ -41,7 +41,8 @@ template <size_t kAlignment> inline uintptr_t MarkCompact::LiveWordsBitmap<kAlignment>::SetLiveWords(uintptr_t begin, size_t size) { const uintptr_t begin_bit_idx = MemRangeBitmap::BitIndexFromAddr(begin); - DCHECK(!Bitmap::TestBit(begin_bit_idx)); + DCHECK(!Bitmap::TestBit(begin_bit_idx)) + << "begin:" << begin << " size:" << size << " begin_bit_idx:" << begin_bit_idx; // Range to set bit: [begin, end] uintptr_t end = begin + size - kAlignment; const uintptr_t end_bit_idx = MemRangeBitmap::BitIndexFromAddr(end); @@ -201,10 +202,10 @@ inline bool MarkCompact::IsOnAllocStack(mirror::Object* ref) { return stack->Contains(ref); } -inline void MarkCompact::UpdateRef(mirror::Object* obj, - MemberOffset offset, - uint8_t* begin, - uint8_t* end) { +inline mirror::Object* MarkCompact::UpdateRef(mirror::Object* obj, + MemberOffset offset, + uint8_t* begin, + uint8_t* end) { mirror::Object* old_ref = obj->GetFieldObject< mirror::Object, kVerifyNone, kWithoutReadBarrier, /*kIsVolatile*/false>(offset); if (kIsDebugBuild) { @@ -240,6 +241,7 @@ inline void MarkCompact::UpdateRef(mirror::Object* obj, offset, new_ref); } + return new_ref; } inline bool MarkCompact::VerifyRootSingleUpdate(void* root, @@ -280,17 +282,17 @@ inline bool MarkCompact::VerifyRootSingleUpdate(void* root, } } DCHECK(reinterpret_cast<uint8_t*>(old_ref) >= black_allocations_begin_ || - live_words_bitmap_->Test(old_ref)) + moving_space_bitmap_->Test(old_ref)) << "ref=" << old_ref << " <" << mirror::Object::PrettyTypeOf(old_ref) << "> RootInfo [" << info << "]"; } return true; } -inline void MarkCompact::UpdateRoot(mirror::CompressedReference<mirror::Object>* root, - uint8_t* begin, - uint8_t* end, - const RootInfo& info) { +inline mirror::Object* MarkCompact::UpdateRoot(mirror::CompressedReference<mirror::Object>* root, + uint8_t* begin, + uint8_t* end, + const RootInfo& info) { DCHECK(!root->IsNull()); mirror::Object* old_ref = root->AsMirrorPtr(); if (VerifyRootSingleUpdate(root, old_ref, info)) { @@ -298,20 +300,24 @@ inline void MarkCompact::UpdateRoot(mirror::CompressedReference<mirror::Object>* if (old_ref != new_ref) { root->Assign(new_ref); } + return new_ref; } + return nullptr; } -inline void MarkCompact::UpdateRoot(mirror::Object** root, - uint8_t* begin, - uint8_t* end, - const RootInfo& info) { +inline mirror::Object* MarkCompact::UpdateRoot(mirror::Object** root, + uint8_t* begin, + uint8_t* end, + const RootInfo& info) { mirror::Object* old_ref = *root; if (VerifyRootSingleUpdate(root, old_ref, info)) { mirror::Object* new_ref = PostCompactAddress(old_ref, begin, end); if (old_ref != new_ref) { *root = new_ref; } + return new_ref; } + return nullptr; } template <size_t kAlignment> @@ -362,8 +368,6 @@ inline mirror::Object* MarkCompact::PostCompactAddressUnchecked(mirror::Object* } if (kIsDebugBuild) { mirror::Object* from_ref = GetFromSpaceAddr(old_ref); - DCHECK(live_words_bitmap_->Test(old_ref)) - << "ref=" << old_ref; if (!moving_space_bitmap_->Test(old_ref)) { std::ostringstream oss; Runtime::Current()->GetHeap()->DumpSpaces(oss); diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index a7af756970..cf5b483f53 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -48,6 +48,7 @@ #include "gc/collector_type.h" #include "gc/reference_processor.h" #include "gc/space/bump_pointer_space.h" +#include "gc/space/space-inl.h" #include "gc/task_processor.h" #include "gc/verification-inl.h" #include "jit/jit_code_cache.h" @@ -61,6 +62,7 @@ #ifdef ART_TARGET_ANDROID #include "android-modules-utils/sdk_level.h" #include "com_android_art.h" +#include "com_android_art_flags.h" #endif #ifndef __BIONIC__ @@ -317,6 +319,18 @@ static bool ShouldUseUserfaultfd() { const bool gUseUserfaultfd = ShouldUseUserfaultfd(); const bool gUseReadBarrier = !gUseUserfaultfd; #endif +#ifdef ART_TARGET_ANDROID +bool ShouldUseGenerationalGC() { + if (gUseUserfaultfd && !com::android::art::flags::use_generational_cmc()) { + return false; + } + // Generational GC feature doesn't need a reboot. Any process (like dex2oat) + // can pick a different values than zygote and will be able to execute. + return GetBoolProperty("persist.device_config.runtime_native.use_generational_gc", true); +} +#else +bool ShouldUseGenerationalGC() { return true; } +#endif namespace gc { namespace collector { @@ -438,18 +452,50 @@ size_t MarkCompact::InitializeInfoMap(uint8_t* p, size_t moving_space_sz) { return total; } +YoungMarkCompact::YoungMarkCompact(Heap* heap, MarkCompact* main) + : GarbageCollector(heap, "young concurrent mark compact"), main_collector_(main) { + // Initialize GC metrics. + metrics::ArtMetrics* metrics = GetMetrics(); + gc_time_histogram_ = metrics->YoungGcCollectionTime(); + metrics_gc_count_ = metrics->YoungGcCount(); + metrics_gc_count_delta_ = metrics->YoungGcCountDelta(); + gc_throughput_histogram_ = metrics->YoungGcThroughput(); + gc_tracing_throughput_hist_ = metrics->YoungGcTracingThroughput(); + gc_throughput_avg_ = metrics->YoungGcThroughputAvg(); + gc_tracing_throughput_avg_ = metrics->YoungGcTracingThroughputAvg(); + gc_scanned_bytes_ = metrics->YoungGcScannedBytes(); + gc_scanned_bytes_delta_ = metrics->YoungGcScannedBytesDelta(); + gc_freed_bytes_ = metrics->YoungGcFreedBytes(); + gc_freed_bytes_delta_ = metrics->YoungGcFreedBytesDelta(); + gc_duration_ = metrics->YoungGcDuration(); + gc_duration_delta_ = metrics->YoungGcDurationDelta(); + are_metrics_initialized_ = true; +} + +void YoungMarkCompact::RunPhases() { + DCHECK(!main_collector_->young_gen_); + main_collector_->young_gen_ = true; + main_collector_->RunPhases(); + main_collector_->young_gen_ = false; +} + MarkCompact::MarkCompact(Heap* heap) : GarbageCollector(heap, "concurrent mark compact"), gc_barrier_(0), lock_("mark compact lock", kGenericBottomLock), + sigbus_in_progress_count_{kSigbusCounterCompactionDoneMask, kSigbusCounterCompactionDoneMask}, + mid_to_old_promo_bit_vec_(nullptr), bump_pointer_space_(heap->GetBumpPointerSpace()), + post_compact_end_(nullptr), + young_gen_(false), + use_generational_(heap->GetUseGenerational()), + compacting_(false), moving_space_bitmap_(bump_pointer_space_->GetMarkBitmap()), moving_space_begin_(bump_pointer_space_->Begin()), moving_space_end_(bump_pointer_space_->Limit()), black_dense_end_(moving_space_begin_), + mid_gen_end_(moving_space_begin_), uffd_(kFdUnused), - sigbus_in_progress_count_{kSigbusCounterCompactionDoneMask, kSigbusCounterCompactionDoneMask}, - compacting_(false), marking_done_(false), uffd_initialized_(false), clamp_info_map_status_(ClampInfoStatus::kClampInfoNotDone) { @@ -524,7 +570,6 @@ MarkCompact::MarkCompact(Heap* heap) // Initialize GC metrics. metrics::ArtMetrics* metrics = GetMetrics(); - // The mark-compact collector supports only full-heap collections at the moment. gc_time_histogram_ = metrics->FullGcCollectionTime(); metrics_gc_count_ = metrics->FullGcCount(); metrics_gc_count_delta_ = metrics->FullGcCountDelta(); @@ -541,6 +586,12 @@ MarkCompact::MarkCompact(Heap* heap) are_metrics_initialized_ = true; } +void MarkCompact::ResetGenerationalState() { + black_dense_end_ = mid_gen_end_ = moving_space_begin_; + post_compact_end_ = nullptr; + class_after_obj_map_.clear(); +} + void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) { DCHECK_ALIGNED_PARAM(begin, gPageSize); DCHECK_ALIGNED_PARAM(len, gPageSize); @@ -610,7 +661,9 @@ void MarkCompact::MaybeClampGcStructures() { } } -void MarkCompact::PrepareCardTableForMarking(bool clear_alloc_space_cards) { +void MarkCompact::PrepareForMarking(bool pre_marking) { + static_assert(gc::accounting::CardTable::kCardDirty - 1 == gc::accounting::CardTable::kCardAged); + static_assert(gc::accounting::CardTable::kCardAged - 1 == gc::accounting::CardTable::kCardAged2); TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); accounting::CardTable* const card_table = heap_->GetCardTable(); // immune_spaces_ is emptied in InitializePhase() before marking starts. This @@ -642,29 +695,66 @@ void MarkCompact::PrepareCardTableForMarking(bool clear_alloc_space_cards) { }, /* card modified visitor */ VoidFunctor()); } - } else if (clear_alloc_space_cards) { + } else if (pre_marking) { CHECK(!space->IsZygoteSpace()); CHECK(!space->IsImageSpace()); - // The card-table corresponding to bump-pointer and non-moving space can - // be cleared, because we are going to traverse all the reachable objects - // in these spaces. This card-table will eventually be used to track - // mutations while concurrent marking is going on. - card_table->ClearCardRange(space->Begin(), space->Limit()); + if (young_gen_) { + uint8_t* space_age_end = space->Limit(); + // Age cards in old-gen as they contain old-to-young references. + if (space == bump_pointer_space_) { + DCHECK_ALIGNED_PARAM(old_gen_end_, gPageSize); + moving_space_bitmap_->ClearRange(reinterpret_cast<mirror::Object*>(old_gen_end_), + reinterpret_cast<mirror::Object*>(moving_space_end_)); + // Clear cards in [old_gen_end_, moving_space_end_) as they are not needed. + card_table->ClearCardRange(old_gen_end_, space->Limit()); + space_age_end = old_gen_end_; + } + card_table->ModifyCardsAtomic(space->Begin(), + space_age_end, + AgeCardVisitor(), + /*card modified visitor=*/VoidFunctor()); + } else { + // The card-table corresponding to bump-pointer and non-moving space can + // be cleared, because we are going to traverse all the reachable objects + // in these spaces. This card-table will eventually be used to track + // mutations while concurrent marking is going on. + card_table->ClearCardRange(space->Begin(), space->Limit()); + if (space == bump_pointer_space_) { + moving_space_bitmap_->Clear(); + } + } if (space != bump_pointer_space_) { CHECK_EQ(space, heap_->GetNonMovingSpace()); + if (young_gen_) { + space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap(); + } non_moving_space_ = space; non_moving_space_bitmap_ = space->GetMarkBitmap(); } } else { - card_table->ModifyCardsAtomic( - space->Begin(), - space->End(), - [](uint8_t card) { - return (card == gc::accounting::CardTable::kCardDirty) ? - gc::accounting::CardTable::kCardAged : - gc::accounting::CardTable::kCardClean; - }, - /* card modified visitor */ VoidFunctor()); + if (young_gen_) { + // It would be correct to retain existing aged cards and add dirty cards + // to that set. However, that would unecessarily need us to re-scan + // cards which haven't been dirtied since first-pass of marking. + auto card_visitor = [](uint8_t card) { + return (card > gc::accounting::CardTable::kCardAged2) + ? card - 1 + : gc::accounting::CardTable::kCardClean; + }; + card_table->ModifyCardsAtomic( + space->Begin(), space->End(), card_visitor, /*card modified visitor=*/VoidFunctor()); + } else { + card_table->ModifyCardsAtomic(space->Begin(), + space->End(), + AgeCardVisitor(), + /*card modified visitor=*/VoidFunctor()); + } + } + } + if (pre_marking && young_gen_) { + for (const auto& space : GetHeap()->GetDiscontinuousSpaces()) { + CHECK(space->IsLargeObjectSpace()); + space->AsLargeObjectSpace()->CopyLiveToMarked(); } } } @@ -707,13 +797,15 @@ void MarkCompact::InitializePhase() { DCHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin()); from_space_slide_diff_ = from_space_begin_ - moving_space_begin_; moving_space_end_ = bump_pointer_space_->Limit(); - if (black_dense_end_ > moving_space_begin_) { - moving_space_bitmap_->Clear(); + if (use_generational_ && !young_gen_) { + class_after_obj_map_.clear(); } - black_dense_end_ = moving_space_begin_; // TODO: Would it suffice to read it once in the constructor, which is called // in zygote process? pointer_size_ = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); + for (size_t i = 0; i < vector_length_; i++) { + DCHECK_EQ(chunk_info_vec_[i], 0u); + } } class MarkCompact::ThreadFlipVisitor : public Closure { @@ -785,6 +877,12 @@ void MarkCompact::RunPhases() { ReaderMutexLock mu(self, *Locks::mutator_lock_); CompactionPhase(); } + } else { + if (use_generational_) { + DCHECK_IMPLIES(post_compact_end_ != nullptr, post_compact_end_ == black_allocations_begin_); + mid_gen_end_ = black_allocations_begin_; + } + post_compact_end_ = black_allocations_begin_; } FinishPhase(); GetHeap()->PostGcVerification(this); @@ -931,6 +1029,41 @@ size_t MarkCompact::InitNonMovingFirstObjects(uintptr_t begin, return page_idx; } +// Generational CMC description +// ============================ +// +// All allocations since last GC are considered to be in young generation. +// Unlike other ART GCs, we promote surviving objects to old generation after +// they survive two contiguous GCs. Objects that survive one GC are considered +// to be in mid generation. In the next young GC, marking is performed on both +// the young as well as mid gen objects. And then during compaction, the +// surviving mid-gen objects are compacted and then promoted to old-gen, while +// the surviving young gen objects are compacted and promoted to mid-gen. +// +// Some other important points worth explaining: +// +// 1. During marking-phase, 'mid_gen_end_' segregates young and mid generations. +// Before starting compaction, in PrepareForCompaction(), we set it to the +// corresponding post-compact addresses, aligned down to page-size. Therefore, +// some object's beginning portion maybe in old-gen, while the rest is in mid-gen. +// Aligning down is essential as mid_gen_end_ becomes old_gen_end_ at the end of +// GC cycle, and the latter has to be page-aligned as old-gen pages are +// processed differently (no compaction). +// +// 2. We need to maintain the mark-bitmap for the old-gen for subsequent GCs, +// when objects are promoted to old-gen from mid-gen, their mark bits are +// first collected in a BitVector and then later copied into mark-bitmap in +// FinishPhase(). We can't directly set the bits in mark-bitmap as the bitmap +// contains pre-compaction mark bits which are required during compaction. +// +// 3. Since we need to revisit mid-gen objects in the next GC cycle, we need to +// dirty the cards in old-gen containing references to them. We identify these +// references when visiting old-gen objects during compaction. However, native +// roots are skipped at that time (they are updated separately in linear-alloc +// space, where we don't know which object (dex-cache/class-loader/class) does +// a native root belong to. Therefore, native roots are covered during marking +// phase. + bool MarkCompact::PrepareForCompaction() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); size_t chunk_info_per_page = gPageSize / kOffsetChunkSize; @@ -938,11 +1071,12 @@ bool MarkCompact::PrepareForCompaction() { DCHECK_LE(vector_len, vector_length_); DCHECK_ALIGNED_PARAM(vector_length_, chunk_info_per_page); if (UNLIKELY(vector_len == 0)) { - // Nothing to compact. + // Nothing to compact. Entire heap is empty. return false; } for (size_t i = 0; i < vector_len; i++) { - DCHECK_LE(chunk_info_vec_[i], kOffsetChunkSize); + DCHECK_LE(chunk_info_vec_[i], kOffsetChunkSize) + << "i:" << i << " vector_length:" << vector_len << " vector_length_:" << vector_length_; DCHECK_EQ(chunk_info_vec_[i], live_words_bitmap_->LiveBytesInBitmapWord(i)); } @@ -970,8 +1104,23 @@ bool MarkCompact::PrepareForCompaction() { size_t black_dense_idx = 0; GcCause gc_cause = GetCurrentIteration()->GetGcCause(); - if (gc_cause != kGcCauseExplicit && gc_cause != kGcCauseCollectorTransition && - !GetCurrentIteration()->GetClearSoftReferences()) { + if (young_gen_) { + DCHECK_ALIGNED_PARAM(old_gen_end_, gPageSize); + DCHECK_GE(mid_gen_end_, old_gen_end_); + DCHECK_GE(black_allocations_begin_, mid_gen_end_); + // old-gen's boundary was decided at the end of previous GC-cycle. + black_dense_idx = (old_gen_end_ - moving_space_begin_) / kOffsetChunkSize; + if (black_dense_idx == vector_len) { + // There is nothing live in young-gen. + DCHECK_EQ(old_gen_end_, black_allocations_begin_); + return false; + } + InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(moving_space_begin_), + reinterpret_cast<uintptr_t>(old_gen_end_), + moving_space_bitmap_, + first_objs_moving_space_); + } else if (gc_cause != kGcCauseExplicit && gc_cause != kGcCauseCollectorTransition && + !GetCurrentIteration()->GetClearSoftReferences()) { uint64_t live_bytes = 0, total_bytes = 0; size_t aligned_vec_len = RoundUp(vector_len, chunk_info_per_page); size_t num_pages = aligned_vec_len / chunk_info_per_page; @@ -1020,13 +1169,15 @@ bool MarkCompact::PrepareForCompaction() { black_dense_idx = (black_dense_end_ - moving_space_begin_) / kOffsetChunkSize; DCHECK_LE(black_dense_idx, vector_len); if (black_dense_idx == vector_len) { - // There is nothing to compact. + // There is nothing to compact. All the in-use pages are completely full. return false; } InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(moving_space_begin_), reinterpret_cast<uintptr_t>(black_dense_end_), moving_space_bitmap_, first_objs_moving_space_); + } else { + black_dense_end_ = moving_space_begin_; } InitMovingSpaceFirstObjects(vector_len, black_dense_idx / chunk_info_per_page); @@ -1061,14 +1212,52 @@ bool MarkCompact::PrepareForCompaction() { black_objs_slide_diff_ = black_allocations_begin_ - post_compact_end_; // We shouldn't be consuming more space after compaction than pre-compaction. CHECK_GE(black_objs_slide_diff_, 0); + for (size_t i = vector_len; i < vector_length_; i++) { + DCHECK_EQ(chunk_info_vec_[i], 0u); + } if (black_objs_slide_diff_ == 0) { + // Regardless of the gc-type, there are no pages to be compacted. black_dense_end_ = black_allocations_begin_; return false; } - for (size_t i = vector_len; i < vector_length_; i++) { - DCHECK_EQ(chunk_info_vec_[i], 0u); + if (use_generational_) { + // Current value of mid_gen_end_ represents end of 'pre-compacted' mid-gen, + // which was done at of previous GC. Compute, 'post-compacted' end of + // mid-gen, which will be consumed by old-gen at the end of this GC cycle. + DCHECK_NE(mid_gen_end_, nullptr); + mirror::Object* first_obj = nullptr; + if (mid_gen_end_ < black_allocations_begin_) { + ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_); + // Find the first live object in the young-gen. + moving_space_bitmap_->VisitMarkedRange</*kVisitOnce=*/true>( + reinterpret_cast<uintptr_t>(mid_gen_end_), + reinterpret_cast<uintptr_t>(black_allocations_begin_), + [&first_obj](mirror::Object* obj) { first_obj = obj; }); + } + if (first_obj != nullptr) { + if (reinterpret_cast<uint8_t*>(first_obj) >= old_gen_end_) { + // post-compact address of the first live object in young-gen. + first_obj = PostCompactOldObjAddr(first_obj); + DCHECK_LT(reinterpret_cast<uint8_t*>(first_obj), post_compact_end_); + } else { + DCHECK(!young_gen_); + } + // It's important to page-align mid-gen boundary. However, that means + // there could be an object overlapping that boundary. We will deal with + // the consequences of that at different places. Aligning down is + // preferred (over aligning up) to avoid pre-mature promotion of young + // objects to old-gen. + mid_gen_end_ = AlignDown(reinterpret_cast<uint8_t*>(first_obj), gPageSize); + } else { + // Young-gen is empty. + mid_gen_end_ = post_compact_end_; + } + } + if (old_gen_end_ < mid_gen_end_) { + mid_to_old_promo_bit_vec_.reset(new BitVector((mid_gen_end_ - old_gen_end_) / kObjectAlignment, + /*expandable=*/false, + Allocator::GetCallocAllocator())); } - // How do we handle compaction of heap portion used for allocations after the // marking-pause? // All allocations after the marking-pause are considered black (reachable) @@ -1193,30 +1382,48 @@ void MarkCompact::ProcessReferences(Thread* self) { GetHeap()->GetReferenceProcessor()->ProcessReferences(self, GetTimings()); } +void MarkCompact::SweepArray(accounting::ObjectStack* obj_arr, bool swap_bitmaps) { + TimingLogger::ScopedTiming t("SweepArray", GetTimings()); + std::vector<space::ContinuousSpace*> sweep_spaces; + for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) { + if (!space->IsAllocSpace() || space == bump_pointer_space_ || + immune_spaces_.ContainsSpace(space) || space->GetLiveBitmap() == nullptr) { + continue; + } + sweep_spaces.push_back(space); + } + GarbageCollector::SweepArray(obj_arr, swap_bitmaps, &sweep_spaces); +} + void MarkCompact::Sweep(bool swap_bitmaps) { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); - // Ensure that nobody inserted objects in the live stack after we swapped the - // stacks. - CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size()); - { - TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings()); - // Mark everything allocated since the last GC as live so that we can sweep - // concurrently, knowing that new allocations won't be marked as live. - accounting::ObjectStack* live_stack = heap_->GetLiveStack(); - heap_->MarkAllocStackAsLive(live_stack); - live_stack->Reset(); - DCHECK(mark_stack_->IsEmpty()); - } - for (const auto& space : GetHeap()->GetContinuousSpaces()) { - if (space->IsContinuousMemMapAllocSpace() && space != bump_pointer_space_ && - !immune_spaces_.ContainsSpace(space)) { - space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace(); - DCHECK(!alloc_space->IsZygoteSpace()); - TimingLogger::ScopedTiming split("SweepMallocSpace", GetTimings()); - RecordFree(alloc_space->Sweep(swap_bitmaps)); + if (young_gen_) { + // Only sweep objects on the live stack. + SweepArray(heap_->GetLiveStack(), /*swap_bitmaps=*/false); + } else { + // Ensure that nobody inserted objects in the live stack after we swapped the + // stacks. + CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size()); + { + TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings()); + // Mark everything allocated since the last GC as live so that we can sweep + // concurrently, knowing that new allocations won't be marked as live. + accounting::ObjectStack* live_stack = heap_->GetLiveStack(); + heap_->MarkAllocStackAsLive(live_stack); + live_stack->Reset(); + DCHECK(mark_stack_->IsEmpty()); } + for (const auto& space : GetHeap()->GetContinuousSpaces()) { + if (space->IsContinuousMemMapAllocSpace() && space != bump_pointer_space_ && + !immune_spaces_.ContainsSpace(space)) { + space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace(); + DCHECK(!alloc_space->IsZygoteSpace()); + TimingLogger::ScopedTiming split("SweepMallocSpace", GetTimings()); + RecordFree(alloc_space->Sweep(swap_bitmaps)); + } + } + SweepLargeObjects(swap_bitmaps); } - SweepLargeObjects(swap_bitmaps); } void MarkCompact::SweepLargeObjects(bool swap_bitmaps) { @@ -1256,22 +1463,43 @@ void MarkCompact::ReclaimPhase() { // We want to avoid checking for every reference if it's within the page or // not. This can be done if we know where in the page the holder object lies. // If it doesn't overlap either boundaries then we can skip the checks. -template <bool kCheckBegin, bool kCheckEnd> +// +// If kDirtyOldToMid = true, then check if the object contains any references +// into young-gen, which will be mid-gen after this GC. This is required +// as we mark and compact mid-gen again in next GC-cycle, and hence cards +// need to be dirtied. Note that even black-allocations (the next young-gen) +// will also have to be checked because the pages are being compacted and hence +// the card corresponding to the compacted page needs to be dirtied. +template <bool kCheckBegin, bool kCheckEnd, bool kDirtyOldToMid> class MarkCompact::RefsUpdateVisitor { public: - explicit RefsUpdateVisitor(MarkCompact* collector, - mirror::Object* obj, - uint8_t* begin, - uint8_t* end) + RefsUpdateVisitor(MarkCompact* collector, + mirror::Object* obj, + uint8_t* begin, + uint8_t* end, + accounting::CardTable* card_table = nullptr, + mirror::Object* card_obj = nullptr) + : RefsUpdateVisitor(collector, obj, begin, end, false) { + DCHECK(!kCheckBegin || begin != nullptr); + DCHECK(!kCheckEnd || end != nullptr); + // We can skip checking each reference for objects whose cards are already dirty. + if (kDirtyOldToMid && card_obj != nullptr) { + dirty_card_ = card_table->IsDirty(card_obj); + } + } + + RefsUpdateVisitor( + MarkCompact* collector, mirror::Object* obj, uint8_t* begin, uint8_t* end, bool dirty_card) : collector_(collector), moving_space_begin_(collector->black_dense_end_), moving_space_end_(collector->moving_space_end_), + young_gen_begin_(collector->mid_gen_end_), obj_(obj), begin_(begin), - end_(end) { - DCHECK(!kCheckBegin || begin != nullptr); - DCHECK(!kCheckEnd || end != nullptr); - } + end_(end), + dirty_card_(dirty_card) {} + + bool ShouldDirtyCard() const { return dirty_card_; } void operator()([[maybe_unused]] mirror::Object* old, MemberOffset offset, @@ -1283,7 +1511,9 @@ class MarkCompact::RefsUpdateVisitor { update = (!kCheckBegin || ref >= begin_) && (!kCheckEnd || ref < end_); } if (update) { - collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_); + mirror::Object* new_ref = + collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_); + CheckShouldDirtyCard(new_ref); } } @@ -1296,7 +1526,9 @@ class MarkCompact::RefsUpdateVisitor { [[maybe_unused]] bool is_static, [[maybe_unused]] bool is_obj_array) const ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) { - collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_); + mirror::Object* new_ref = + collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_); + CheckShouldDirtyCard(new_ref); } void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const @@ -1310,18 +1542,38 @@ class MarkCompact::RefsUpdateVisitor { void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) { - collector_->UpdateRoot(root, moving_space_begin_, moving_space_end_); + mirror::Object* new_ref = collector_->UpdateRoot(root, moving_space_begin_, moving_space_end_); + CheckShouldDirtyCard(new_ref); } private: + inline void CheckShouldDirtyCard(mirror::Object* ref) const { + if (kDirtyOldToMid && !dirty_card_) { + // moving_space_end_ is young-gen's end. + dirty_card_ = reinterpret_cast<uint8_t*>(ref) >= young_gen_begin_ && + reinterpret_cast<uint8_t*>(ref) < moving_space_end_; + } + } + MarkCompact* const collector_; uint8_t* const moving_space_begin_; uint8_t* const moving_space_end_; + uint8_t* const young_gen_begin_; mirror::Object* const obj_; uint8_t* const begin_; uint8_t* const end_; + mutable bool dirty_card_; }; +inline void MarkCompact::SetBitForMidToOldPromotion(uint8_t* obj) { + DCHECK(use_generational_); + DCHECK_GE(obj, old_gen_end_); + DCHECK_LT(obj, mid_gen_end_); + // This doesn't need to be atomic as every thread only sets bits in the + // bit_vector words corresponding to the page it is compacting. + mid_to_old_promo_bit_vec_->SetBit((obj - old_gen_end_) / kObjectAlignment); +} + bool MarkCompact::IsValidObject(mirror::Object* obj) const { mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>(); if (!heap_->GetVerification()->IsValidHeapObjectAddress(klass)) { @@ -1345,7 +1597,9 @@ void MarkCompact::VerifyObject(mirror::Object* ref, Callback& callback) const { << " post_compact_end=" << static_cast<void*>(post_compact_end_) << " pre_compact_klass=" << pre_compact_klass << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_); - CHECK(live_words_bitmap_->Test(pre_compact_klass)); + if (!young_gen_) { + CHECK(live_words_bitmap_->Test(pre_compact_klass)); + } } if (!IsValidObject(ref)) { std::ostringstream oss; @@ -1374,10 +1628,13 @@ void MarkCompact::VerifyObject(mirror::Object* ref, Callback& callback) const { } } +template <bool kSetupForGenerational> void MarkCompact::CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* addr, + uint8_t* to_space_addr, bool needs_memset_zero) { + DCHECK_ALIGNED_PARAM(to_space_addr, gPageSize); DCHECK(moving_space_bitmap_->Test(obj) && live_words_bitmap_->Test(obj)); DCHECK(live_words_bitmap_->Test(offset)) << "obj=" << obj @@ -1387,30 +1644,42 @@ void MarkCompact::CompactPage(mirror::Object* obj, << static_cast<void*>(black_allocations_begin_) << " post_compact_addr=" << static_cast<void*>(post_compact_end_); + accounting::CardTable* card_table = heap_->GetCardTable(); uint8_t* const start_addr = addr; + // We need to find the cards in the mid-gen (which is going to be consumed + // into old-gen after this GC) for dirty cards (dirtied after marking-pause and + // until compaction pause) and dirty the corresponding post-compact cards. We + // could have found reference fields while updating them in RefsUpdateVisitor. + // But it will not catch native-roots and hence we need to directly look at the + // pre-compact card-table. + // NOTE: we may get some false-positives if the same address in post-compact + // heap is already allocated as TLAB and has been having write-barrers be + // called. But that is not harmful. + size_t cards_per_page = gPageSize >> accounting::CardTable::kCardShift; + size_t dest_cards = 0; + DCHECK(IsAligned<accounting::CardTable::kCardSize>(gPageSize)); + static_assert(sizeof(dest_cards) * kBitsPerByte >= + kMaxPageSize / accounting::CardTable::kCardSize); // How many distinct live-strides do we have. size_t stride_count = 0; uint8_t* last_stride = addr; uint32_t last_stride_begin = 0; - auto verify_obj_callback = [&] (std::ostream& os) { - os << " stride_count=" << stride_count - << " last_stride=" << static_cast<void*>(last_stride) - << " offset=" << offset - << " start_addr=" << static_cast<void*>(start_addr); - }; - obj = GetFromSpaceAddr(obj); + auto verify_obj_callback = [&](std::ostream& os) { + os << " stride_count=" << stride_count << " last_stride=" << static_cast<void*>(last_stride) + << " offset=" << offset << " start_addr=" << static_cast<void*>(start_addr); + }; live_words_bitmap_->VisitLiveStrides( offset, black_allocations_begin_, gPageSize, - [&addr, &last_stride, &stride_count, &last_stride_begin, verify_obj_callback, this]( - uint32_t stride_begin, size_t stride_size, [[maybe_unused]] bool is_last) + [&](uint32_t stride_begin, size_t stride_size, [[maybe_unused]] bool is_last) REQUIRES_SHARED(Locks::mutator_lock_) { - const size_t stride_in_bytes = stride_size * kAlignment; + size_t stride_in_bytes = stride_size * kAlignment; + size_t stride_begin_bytes = stride_begin * kAlignment; DCHECK_LE(stride_in_bytes, gPageSize); last_stride_begin = stride_begin; DCHECK(IsAligned<kAlignment>(addr)); - memcpy(addr, from_space_begin_ + stride_begin * kAlignment, stride_in_bytes); + memcpy(addr, from_space_begin_ + stride_begin_bytes, stride_in_bytes); if (kIsDebugBuild) { uint8_t* space_begin = bump_pointer_space_->Begin(); // We can interpret the first word of the stride as an @@ -1428,43 +1697,94 @@ void MarkCompact::CompactPage(mirror::Object* obj, } } last_stride = addr; - addr += stride_in_bytes; stride_count++; + if (kSetupForGenerational) { + // Card idx within the gPageSize sized destination page. + size_t dest_card_idx = (addr - start_addr) >> accounting::CardTable::kCardShift; + DCHECK_LT(dest_card_idx, cards_per_page); + // Bytes remaining to fill in the current dest card. + size_t dest_bytes_remaining = accounting::CardTable::kCardSize - + (addr - start_addr) % accounting::CardTable::kCardSize; + // Update 'addr' for next stride before starting to modify + // 'stride_in_bytes' in the loops below. + addr += stride_in_bytes; + // Unconsumed bytes in the current src card. + size_t src_card_bytes = accounting::CardTable::kCardSize - + stride_begin_bytes % accounting::CardTable::kCardSize; + src_card_bytes = std::min(src_card_bytes, stride_in_bytes); + uint8_t* end_card = card_table->CardFromAddr( + moving_space_begin_ + stride_begin_bytes + stride_in_bytes - 1); + for (uint8_t* card = + card_table->CardFromAddr(moving_space_begin_ + stride_begin_bytes); + card <= end_card; + card++) { + if (*card == accounting::CardTable::kCardDirty) { + // If the current src card will contribute to the next dest + // card as well, then dirty the next one too. + size_t val = dest_bytes_remaining < src_card_bytes ? 3 : 1; + dest_cards |= val << dest_card_idx; + } + // Adjust destination card and its remaining bytes for next iteration. + if (dest_bytes_remaining <= src_card_bytes) { + dest_bytes_remaining = + accounting::CardTable::kCardSize - (src_card_bytes - dest_bytes_remaining); + dest_card_idx++; + } else { + dest_bytes_remaining -= src_card_bytes; + } + DCHECK_LE(dest_card_idx, cards_per_page); + stride_in_bytes -= src_card_bytes; + src_card_bytes = std::min(accounting::CardTable::kCardSize, stride_in_bytes); + } + } else { + addr += stride_in_bytes; + } }); DCHECK_LT(last_stride, start_addr + gPageSize); DCHECK_GT(stride_count, 0u); size_t obj_size = 0; - uint32_t offset_within_obj = offset * kAlignment - - (reinterpret_cast<uint8_t*>(obj) - from_space_begin_); + uint32_t offset_within_obj = + offset * kAlignment - (reinterpret_cast<uint8_t*>(obj) - moving_space_begin_); // First object if (offset_within_obj > 0) { + bool should_dirty_card; mirror::Object* to_ref = reinterpret_cast<mirror::Object*>(start_addr - offset_within_obj); + mirror::Object* from_obj = GetFromSpaceAddr(obj); + mirror::Object* post_compact_obj = nullptr; + if (kSetupForGenerational) { + post_compact_obj = PostCompactAddress(obj, black_dense_end_, moving_space_end_); + } if (stride_count > 1) { - RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this, - to_ref, - start_addr, - nullptr); - obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>( + RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ false, kSetupForGenerational> visitor( + this, to_ref, start_addr, nullptr, card_table, post_compact_obj); + obj_size = + from_obj->VisitRefsForCompaction</*kFetchObjSize*/ true, /*kVisitNativeRoots*/ false>( visitor, MemberOffset(offset_within_obj), MemberOffset(-1)); + should_dirty_card = visitor.ShouldDirtyCard(); } else { - RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this, - to_ref, - start_addr, - start_addr + gPageSize); - obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>( - visitor, MemberOffset(offset_within_obj), MemberOffset(offset_within_obj - + gPageSize)); + RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ true, kSetupForGenerational> visitor( + this, to_ref, start_addr, start_addr + gPageSize, card_table, post_compact_obj); + obj_size = + from_obj->VisitRefsForCompaction</*kFetchObjSize*/ true, /*kVisitNativeRoots*/ false>( + visitor, + MemberOffset(offset_within_obj), + MemberOffset(offset_within_obj + gPageSize)); + should_dirty_card = visitor.ShouldDirtyCard(); + } + if (kSetupForGenerational && should_dirty_card) { + card_table->MarkCard(post_compact_obj); } obj_size = RoundUp(obj_size, kAlignment); DCHECK_GT(obj_size, offset_within_obj) - << "obj:" << obj << " class:" << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>() + << "obj:" << obj + << " class:" << from_obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>() << " to_addr:" << to_ref << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_) << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_) << " offset:" << offset * kAlignment << " class-after-obj-iter:" - << (class_after_obj_iter_ != class_after_obj_map_.rend() ? - class_after_obj_iter_->first.AsMirrorPtr() : - nullptr) + << (class_after_obj_iter_ != class_after_obj_map_.rend() + ? class_after_obj_iter_->first.AsMirrorPtr() + : nullptr) << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_) << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_ << " offset-of-last-idx:" @@ -1493,9 +1813,19 @@ void MarkCompact::CompactPage(mirror::Object* obj, while (bytes_to_visit > bytes_done) { mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done); VerifyObject(ref, verify_obj_callback); - RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> - visitor(this, ref, nullptr, nullptr); + RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ false, kSetupForGenerational> visitor( + this, + ref, + nullptr, + nullptr, + dest_cards & (1 << (bytes_done >> accounting::CardTable::kCardShift))); obj_size = ref->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1)); + if (kSetupForGenerational) { + SetBitForMidToOldPromotion(to_space_addr + bytes_done); + if (visitor.ShouldDirtyCard()) { + card_table->MarkCard(reinterpret_cast<mirror::Object*>(to_space_addr + bytes_done)); + } + } obj_size = RoundUp(obj_size, kAlignment); bytes_done += obj_size; } @@ -1511,11 +1841,21 @@ void MarkCompact::CompactPage(mirror::Object* obj, mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done); obj = reinterpret_cast<mirror::Object*>(from_addr); VerifyObject(ref, verify_obj_callback); - RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> - visitor(this, ref, nullptr, start_addr + gPageSize); + RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true, kSetupForGenerational> visitor( + this, + ref, + nullptr, + start_addr + gPageSize, + dest_cards & (1 << (bytes_done >> accounting::CardTable::kCardShift))); obj_size = obj->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(end_addr - (addr + bytes_done))); + if (kSetupForGenerational) { + SetBitForMidToOldPromotion(to_space_addr + bytes_done); + if (visitor.ShouldDirtyCard()) { + card_table->MarkCard(reinterpret_cast<mirror::Object*>(to_space_addr + bytes_done)); + } + } obj_size = RoundUp(obj_size, kAlignment); DCHECK_GT(obj_size, 0u) << "from_addr:" << obj @@ -2143,7 +2483,19 @@ void MarkCompact::CompactMovingSpace(uint8_t* page) { page, /*map_immediately=*/page == reserve_page, [&]() REQUIRES_SHARED(Locks::mutator_lock_) { - CompactPage(first_obj, pre_compact_offset_moving_space_[idx], page, kMode == kCopyMode); + if (use_generational_ && to_space_end < mid_gen_end_) { + CompactPage</*kSetupForGenerational=*/true>(first_obj, + pre_compact_offset_moving_space_[idx], + page, + to_space_end, + kMode == kCopyMode); + } else { + CompactPage</*kSetupForGenerational=*/false>(first_obj, + pre_compact_offset_moving_space_[idx], + page, + to_space_end, + kMode == kCopyMode); + } }); if (kMode == kCopyMode && (!success || page == reserve_page) && end_idx_for_mapping - idx > 1) { // map the pages in the following address as they can't be mapped with the @@ -2169,8 +2521,13 @@ void MarkCompact::CompactMovingSpace(uint8_t* page) { to_space_end + from_space_slide_diff_, /*map_immediately=*/false, [&]() REQUIRES_SHARED(Locks::mutator_lock_) { - UpdateNonMovingPage( - first_obj, to_space_end, from_space_slide_diff_, moving_space_bitmap_); + if (use_generational_) { + UpdateNonMovingPage</*kSetupForGenerational=*/true>( + first_obj, to_space_end, from_space_slide_diff_, moving_space_bitmap_); + } else { + UpdateNonMovingPage</*kSetupForGenerational=*/false>( + first_obj, to_space_end, from_space_slide_diff_, moving_space_bitmap_); + } if (kMode == kFallbackMode) { memcpy(to_space_end, to_space_end + from_space_slide_diff_, gPageSize); } @@ -2274,55 +2631,94 @@ size_t MarkCompact::MapMovingSpacePages(size_t start_idx, return arr_len - start_idx; } +template <bool kSetupForGenerational> void MarkCompact::UpdateNonMovingPage(mirror::Object* first, uint8_t* page, ptrdiff_t from_space_diff, accounting::ContinuousSpaceBitmap* bitmap) { DCHECK_LT(reinterpret_cast<uint8_t*>(first), page + gPageSize); + accounting::CardTable* card_table = heap_->GetCardTable(); + mirror::Object* curr_obj = first; + uint8_t* from_page = page + from_space_diff; + uint8_t* from_page_end = from_page + gPageSize; + uint8_t* scan_begin = + std::max(reinterpret_cast<uint8_t*>(first) + mirror::kObjectHeaderSize, page); // For every object found in the page, visit the previous object. This ensures // that we can visit without checking page-end boundary. // Call VisitRefsForCompaction with from-space read-barrier as the klass object and // super-class loads require it. // TODO: Set kVisitNativeRoots to false once we implement concurrent // compaction - mirror::Object* curr_obj = first; - uint8_t* from_page = page + from_space_diff; - uint8_t* from_page_end = from_page + gPageSize; - bitmap->VisitMarkedRange( - reinterpret_cast<uintptr_t>(first) + mirror::kObjectHeaderSize, - reinterpret_cast<uintptr_t>(page + gPageSize), - [&](mirror::Object* next_obj) { - mirror::Object* from_obj = reinterpret_cast<mirror::Object*>( - reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff); - if (reinterpret_cast<uint8_t*>(curr_obj) < page) { - RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ false> visitor( - this, from_obj, from_page, from_page_end); - MemberOffset begin_offset(page - reinterpret_cast<uint8_t*>(curr_obj)); - // Native roots shouldn't be visited as they are done when this - // object's beginning was visited in the preceding page. - from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>( - visitor, begin_offset, MemberOffset(-1)); - } else { - RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ false> visitor( - this, from_obj, from_page, from_page_end); - from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>( - visitor, MemberOffset(0), MemberOffset(-1)); - } - curr_obj = next_obj; - }); - - mirror::Object* from_obj = - reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff); - MemberOffset end_offset(page + gPageSize - reinterpret_cast<uint8_t*>(curr_obj)); - if (reinterpret_cast<uint8_t*>(curr_obj) < page) { - RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ true> visitor( - this, from_obj, from_page, from_page_end); - from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>( - visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset); + auto obj_visitor = [&](mirror::Object* next_obj) { + if (curr_obj != nullptr) { + mirror::Object* from_obj = + reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff); + bool should_dirty_card; + if (reinterpret_cast<uint8_t*>(curr_obj) < page) { + RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ false, kSetupForGenerational> visitor( + this, from_obj, from_page, from_page_end, card_table, curr_obj); + MemberOffset begin_offset(page - reinterpret_cast<uint8_t*>(curr_obj)); + // Native roots shouldn't be visited as they are done when this + // object's beginning was visited in the preceding page. + from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>( + visitor, begin_offset, MemberOffset(-1)); + should_dirty_card = visitor.ShouldDirtyCard(); + } else { + RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ false, kSetupForGenerational> + visitor(this, from_obj, from_page, from_page_end, card_table, curr_obj); + from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>( + visitor, MemberOffset(0), MemberOffset(-1)); + should_dirty_card = visitor.ShouldDirtyCard(); + } + if (kSetupForGenerational && should_dirty_card) { + card_table->MarkCard(curr_obj); + } + } + curr_obj = next_obj; + }; + + if (young_gen_) { + DCHECK(bitmap->Test(first)); + // If the first-obj is covered by the same card which also covers the first + // word of the page, then it's important to set curr_obj to nullptr to avoid + // updating the references twice. + if (card_table->IsClean(first) || + card_table->CardFromAddr(first) == card_table->CardFromAddr(scan_begin)) { + curr_obj = nullptr; + } + // We cannot acquire heap-bitmap-lock here as this function is called from + // SIGBUS handler. But it's safe as the bitmap passed to Scan function + // can't get modified until this GC cycle is finished. + FakeMutexLock mu(*Locks::heap_bitmap_lock_); + card_table->Scan</*kClearCard=*/false>( + bitmap, scan_begin, page + gPageSize, obj_visitor, accounting::CardTable::kCardAged2); } else { - RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true> visitor( - this, from_obj, from_page, from_page_end); - from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(visitor, MemberOffset(0), end_offset); + bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(scan_begin), + reinterpret_cast<uintptr_t>(page + gPageSize), + obj_visitor); + } + + if (curr_obj != nullptr) { + bool should_dirty_card; + mirror::Object* from_obj = + reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff); + MemberOffset end_offset(page + gPageSize - reinterpret_cast<uint8_t*>(curr_obj)); + if (reinterpret_cast<uint8_t*>(curr_obj) < page) { + RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ true, kSetupForGenerational> visitor( + this, from_obj, from_page, from_page_end, card_table, curr_obj); + from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>( + visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset); + should_dirty_card = visitor.ShouldDirtyCard(); + } else { + RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true, kSetupForGenerational> visitor( + this, from_obj, from_page, from_page_end, card_table, curr_obj); + from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>( + visitor, MemberOffset(0), end_offset); + should_dirty_card = visitor.ShouldDirtyCard(); + } + if (kSetupForGenerational && should_dirty_card) { + card_table->MarkCard(curr_obj); + } } } @@ -2340,7 +2736,13 @@ void MarkCompact::UpdateNonMovingSpace() { page -= gPageSize; // null means there are no objects on the page to update references. if (obj != nullptr) { - UpdateNonMovingPage(obj, page, /*from_space_diff=*/0, non_moving_space_bitmap_); + if (use_generational_) { + UpdateNonMovingPage</*kSetupForGenerational=*/true>( + obj, page, /*from_space_diff=*/0, non_moving_space_bitmap_); + } else { + UpdateNonMovingPage</*kSetupForGenerational=*/false>( + obj, page, /*from_space_diff=*/0, non_moving_space_bitmap_); + } } } } @@ -2504,12 +2906,15 @@ void MarkCompact::UpdateNonMovingSpaceBlackAllocations() { accounting::ObjectStack* stack = heap_->GetAllocationStack(); const StackReference<mirror::Object>* limit = stack->End(); uint8_t* const space_begin = non_moving_space_->Begin(); + size_t num_pages = DivideByPageSize(non_moving_space_->Capacity()); for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) { mirror::Object* obj = it->AsMirrorPtr(); if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) { non_moving_space_bitmap_->Set(obj); - // Clear so that we don't try to set the bit again in the next GC-cycle. - it->Clear(); + if (!use_generational_) { + // Clear so that we don't try to set the bit again in the next GC-cycle. + it->Clear(); + } size_t idx = DivideByPageSize(reinterpret_cast<uint8_t*>(obj) - space_begin); uint8_t* page_begin = AlignDown(reinterpret_cast<uint8_t*>(obj), gPageSize); mirror::Object* first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr(); @@ -2517,7 +2922,10 @@ void MarkCompact::UpdateNonMovingSpaceBlackAllocations() { || (obj < first_obj && reinterpret_cast<uint8_t*>(first_obj) > page_begin)) { first_objs_non_moving_space_[idx].Assign(obj); } - mirror::Object* next_page_first_obj = first_objs_non_moving_space_[++idx].AsMirrorPtr(); + if (++idx == num_pages) { + continue; + } + mirror::Object* next_page_first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr(); uint8_t* next_page_begin = page_begin + gPageSize; if (next_page_first_obj == nullptr || reinterpret_cast<uint8_t*>(next_page_first_obj) > next_page_begin) { @@ -3114,8 +3522,7 @@ void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page, size_t nr_moving_space_used_pages, bool tolerate_enoent) { Thread* self = Thread::Current(); - uint8_t* unused_space_begin = - bump_pointer_space_->Begin() + nr_moving_space_used_pages * gPageSize; + uint8_t* unused_space_begin = moving_space_begin_ + nr_moving_space_used_pages * gPageSize; DCHECK(IsAlignedParam(unused_space_begin, gPageSize)); if (fault_page >= unused_space_begin) { // There is a race which allows more than one thread to install a @@ -3124,7 +3531,7 @@ void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page, ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, tolerate_enoent); return; } - size_t page_idx = DivideByPageSize(fault_page - bump_pointer_space_->Begin()); + size_t page_idx = DivideByPageSize(fault_page - moving_space_begin_); DCHECK_LT(page_idx, moving_first_objs_count_ + black_page_count_); mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr(); if (first_obj == nullptr) { @@ -3177,7 +3584,13 @@ void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page, static_cast<uint8_t>(PageState::kMutatorProcessing), std::memory_order_acquire)) { if (fault_page < black_dense_end_) { - UpdateNonMovingPage(first_obj, fault_page, from_space_slide_diff_, moving_space_bitmap_); + if (use_generational_) { + UpdateNonMovingPage</*kSetupForGenerational=*/true>( + first_obj, fault_page, from_space_slide_diff_, moving_space_bitmap_); + } else { + UpdateNonMovingPage</*kSetupForGenerational=*/false>( + first_obj, fault_page, from_space_slide_diff_, moving_space_bitmap_); + } buf = fault_page + from_space_slide_diff_; } else { if (UNLIKELY(buf == nullptr)) { @@ -3191,10 +3604,19 @@ void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page, if (fault_page < post_compact_end_) { // The page has to be compacted. - CompactPage(first_obj, - pre_compact_offset_moving_space_[page_idx], - buf, - /*needs_memset_zero=*/true); + if (use_generational_ && fault_page < mid_gen_end_) { + CompactPage</*kSetupGenerational=*/true>(first_obj, + pre_compact_offset_moving_space_[page_idx], + buf, + fault_page, + /*needs_memset_zero=*/true); + } else { + CompactPage</*kSetupGenerational=*/false>(first_obj, + pre_compact_offset_moving_space_[page_idx], + buf, + fault_page, + /*needs_memset_zero=*/true); + } } else { DCHECK_NE(first_obj, nullptr); DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u); @@ -3848,10 +4270,45 @@ void MarkCompact::UpdateAndMarkModUnion() { } } +void MarkCompact::ScanOldGenObjects() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + accounting::CardTable* const card_table = heap_->GetCardTable(); + // Moving space + card_table->Scan</*kClearCard=*/false>(moving_space_bitmap_, + moving_space_begin_, + old_gen_end_, + ScanObjectVisitor(this), + gc::accounting::CardTable::kCardAged2); + ProcessMarkStack(); + // Non-moving space + card_table->Scan</*kClearCard=*/false>(non_moving_space_bitmap_, + non_moving_space_->Begin(), + non_moving_space_->End(), + ScanObjectVisitor(this), + gc::accounting::CardTable::kCardAged2); + ProcessMarkStack(); +} + void MarkCompact::MarkReachableObjects() { UpdateAndMarkModUnion(); // Recursively mark all the non-image bits set in the mark bitmap. ProcessMarkStack(); + if (young_gen_) { + // For the object overlapping on the old-gen boundary, we need to visit it + // to make sure that we don't miss the references in the mid-gen area, and + // also update the corresponding liveness info. + if (old_gen_end_ > moving_space_begin_) { + uintptr_t old_gen_end = reinterpret_cast<uintptr_t>(old_gen_end_); + mirror::Object* obj = moving_space_bitmap_->FindPrecedingObject(old_gen_end - kAlignment); + if (obj != nullptr) { + size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>(); + if (reinterpret_cast<uintptr_t>(obj) + RoundUp(obj_size, kAlignment) > old_gen_end) { + ScanObject</*kUpdateLiveWords=*/true>(obj); + } + } + } + ScanOldGenObjects(); + } } void MarkCompact::ScanDirtyObjects(bool paused, uint8_t minimum_age) { @@ -3866,12 +4323,31 @@ void MarkCompact::ScanDirtyObjects(bool paused, uint8_t minimum_age) { name = paused ? "(Paused)ScanGrayZygoteSpaceObjects" : "ScanGrayZygoteSpaceObjects"; break; case space::kGcRetentionPolicyAlwaysCollect: + DCHECK(space == bump_pointer_space_ || space == non_moving_space_); name = paused ? "(Paused)ScanGrayAllocSpaceObjects" : "ScanGrayAllocSpaceObjects"; break; } TimingLogger::ScopedTiming t(name, GetTimings()); - card_table->Scan</*kClearCard*/ false>( - space->GetMarkBitmap(), space->Begin(), space->End(), ScanObjectVisitor(this), minimum_age); + if (paused && young_gen_ && + space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) { + auto mod_visitor = [](uint8_t* card, uint8_t cur_val) { + DCHECK_EQ(cur_val, accounting::CardTable::kCardDirty); + *card = accounting::CardTable::kCardAged; + }; + + card_table->Scan</*kClearCard=*/false>(space->GetMarkBitmap(), + space->Begin(), + space->End(), + ScanObjectVisitor(this), + mod_visitor, + minimum_age); + } else { + card_table->Scan</*kClearCard=*/false>(space->GetMarkBitmap(), + space->Begin(), + space->End(), + ScanObjectVisitor(this), + minimum_age); + } } } @@ -3898,7 +4374,7 @@ void MarkCompact::PreCleanCards() { // Age the card-table before thread stack scanning checkpoint in MarkRoots() // as it ensures that there are no in-progress write barriers which started // prior to aging the card-table. - PrepareCardTableForMarking(/*clear_alloc_space_cards*/ false); + PrepareForMarking(/*pre_marking=*/false); MarkRoots(static_cast<VisitRootFlags>(kVisitRootFlagClearRootLog | kVisitRootFlagNewRoots)); RecursiveMarkDirtyObjects(/*paused*/ false, accounting::CardTable::kCardDirty - 1); } @@ -3920,7 +4396,7 @@ void MarkCompact::MarkingPhase() { DCHECK_EQ(thread_running_gc_, Thread::Current()); WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_); MaybeClampGcStructures(); - PrepareCardTableForMarking(/*clear_alloc_space_cards*/ true); + PrepareForMarking(/*pre_marking=*/true); MarkZygoteLargeObjects(); MarkRoots( static_cast<VisitRootFlags>(kVisitRootFlagAllRoots | kVisitRootFlagStartLoggingNewRoots)); @@ -3941,8 +4417,21 @@ void MarkCompact::MarkingPhase() { class MarkCompact::RefFieldsVisitor { public: - ALWAYS_INLINE explicit RefFieldsVisitor(MarkCompact* const mark_compact) - : mark_compact_(mark_compact) {} + ALWAYS_INLINE RefFieldsVisitor(MarkCompact* const mark_compact, mirror::Object* obj) + : mark_compact_(mark_compact), + // Conceptually we only need to check for references in [mid_gen_end_, + // moving_space_end_) range, but we enlarge (towards the beginning) it + // because later in PrepareForCompaction() we will align-down the + // mid-gen boundary, which means we may not promote some mid-gen objects + // on the first page to old-gen. + young_gen_begin_(mark_compact->old_gen_end_), + young_gen_end_(mark_compact->moving_space_end_), + dirty_card_(false), + check_refs_to_young_gen_(mark_compact->use_generational_ && + (reinterpret_cast<uint8_t*>(obj) < mark_compact->mid_gen_end_ || + reinterpret_cast<uint8_t*>(obj) >= young_gen_end_)) {} + + bool ShouldDirtyCard() const { return dirty_card_; } ALWAYS_INLINE void operator()(mirror::Object* obj, MemberOffset offset, @@ -3952,7 +4441,8 @@ class MarkCompact::RefFieldsVisitor { Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current()); } - mark_compact_->MarkObject(obj->GetFieldObject<mirror::Object>(offset), obj, offset); + mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset); + mark_compact_->MarkObject(ref, obj, offset); } void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const ALWAYS_INLINE @@ -3974,11 +4464,20 @@ class MarkCompact::RefFieldsVisitor { Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current()); } - mark_compact_->MarkObject(root->AsMirrorPtr()); + mirror::Object* ref = root->AsMirrorPtr(); + mark_compact_->MarkObject(ref); + if (check_refs_to_young_gen_) { + dirty_card_ |= reinterpret_cast<uint8_t*>(ref) >= young_gen_begin_ && + reinterpret_cast<uint8_t*>(ref) < young_gen_end_; + } } private: MarkCompact* const mark_compact_; + uint8_t* const young_gen_begin_; + uint8_t* const young_gen_end_; + mutable bool dirty_card_; + const bool check_refs_to_young_gen_; }; template <size_t kAlignment> @@ -3998,19 +4497,24 @@ void MarkCompact::UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) { UpdateClassAfterObjectMap(obj); size_t size = RoundUp(obj_size, kAlignment); uintptr_t bit_index = live_words_bitmap_->SetLiveWords(obj_begin, size); - size_t chunk_idx = (obj_begin - live_words_bitmap_->Begin()) / kOffsetChunkSize; + size_t chunk_idx = + (obj_begin - reinterpret_cast<uintptr_t>(moving_space_begin_)) / kOffsetChunkSize; // Compute the bit-index within the chunk-info vector word. bit_index %= kBitsPerVectorWord; size_t first_chunk_portion = std::min(size, (kBitsPerVectorWord - bit_index) * kAlignment); - - chunk_info_vec_[chunk_idx++] += first_chunk_portion; + chunk_info_vec_[chunk_idx] += first_chunk_portion; + DCHECK_LE(chunk_info_vec_[chunk_idx], kOffsetChunkSize) + << "first_chunk_portion:" << first_chunk_portion + << " obj-size:" << RoundUp(obj_size, kAlignment); + chunk_idx++; DCHECK_LE(first_chunk_portion, size); for (size -= first_chunk_portion; size > kOffsetChunkSize; size -= kOffsetChunkSize) { DCHECK_EQ(chunk_info_vec_[chunk_idx], 0u); chunk_info_vec_[chunk_idx++] = kOffsetChunkSize; } chunk_info_vec_[chunk_idx] += size; - freed_objects_--; + DCHECK_LE(chunk_info_vec_[chunk_idx], kOffsetChunkSize) + << "size:" << size << " obj-size:" << RoundUp(obj_size, kAlignment); } template <bool kUpdateLiveWords> @@ -4048,12 +4552,25 @@ void MarkCompact::ScanObject(mirror::Object* obj) { size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>(); bytes_scanned_ += obj_size; - RefFieldsVisitor visitor(this); + RefFieldsVisitor visitor(this, obj); DCHECK(IsMarked(obj)) << "Scanning marked object " << obj << "\n" << heap_->DumpSpaces(); if (kUpdateLiveWords && HasAddress(obj)) { UpdateLivenessInfo(obj, obj_size); + freed_objects_--; } obj->VisitReferences(visitor, visitor); + // old-gen cards for objects containing references to mid-gen needs to be kept + // dirty for re-scan in the next GC cycle. We take care of that majorly during + // compaction-phase as that enables us to implicitly take care of + // black-allocated objects as well. Unfortunately, since we don't visit + // native-roots during compaction, that has to be captured during marking. + // + // Note that we can't dirty the cards right away because then we will wrongly + // age them during re-scan of this marking-phase, and thereby may loose them + // by the end of the GC cycle. + if (visitor.ShouldDirtyCard()) { + dirty_cards_later_vec_.push_back(obj); + } } // Scan anything that's on the mark stack. @@ -4101,6 +4618,12 @@ inline bool MarkCompact::MarkObjectNonNullNoPush(mirror::Object* obj, // We expect most of the referenes to be in bump-pointer space, so try that // first to keep the cost of this function minimal. if (LIKELY(HasAddress(obj))) { + // If obj is in old-gen (during young-gc) then we shouldn't add it to + // mark-stack to limit marking to young generation. + if (young_gen_ && reinterpret_cast<uint8_t*>(obj) < old_gen_end_) { + DCHECK(moving_space_bitmap_->Test(obj)); + return false; + } return kParallel ? !moving_space_bitmap_->AtomicTestAndSet(obj) : !moving_space_bitmap_->Set(obj); } else if (non_moving_space_bitmap_->HasAddress(obj)) { @@ -4249,23 +4772,116 @@ void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass, } void MarkCompact::FinishPhase() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); GetCurrentIteration()->SetScannedBytes(bytes_scanned_); bool is_zygote = Runtime::Current()->IsZygote(); compacting_ = false; marking_done_ = false; - ZeroAndReleaseMemory(compaction_buffers_map_.Begin(), compaction_buffers_map_.Size()); - info_map_.MadviseDontNeedAndZero(); - live_words_bitmap_->ClearBitmap(); - if (moving_space_begin_ == black_dense_end_) { + uint8_t* mark_bitmap_clear_end = black_dense_end_; + if (use_generational_) { + { + ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_); + // We need to retain class-after-object map for old-gen as that won't + // be created in next young-gc. + // + // Find the first class which is getting promoted to old-gen. + auto iter = class_after_obj_map_.lower_bound( + ObjReference::FromMirrorPtr(reinterpret_cast<mirror::Object*>(old_gen_end_))); + while (iter != class_after_obj_map_.end()) { + // As 'mid_gen_end_' is where our old-gen will end now, compute + // compacted addresses of <class, object> for comparisons and updating + // in the map. + mirror::Object* compacted_klass = nullptr; + mirror::Object* compacted_obj = nullptr; + mirror::Object* klass = iter->first.AsMirrorPtr(); + mirror::Object* obj = iter->second.AsMirrorPtr(); + DCHECK_GT(klass, obj); + if (reinterpret_cast<uint8_t*>(klass) < black_allocations_begin_) { + DCHECK(moving_space_bitmap_->Test(klass)); + DCHECK(moving_space_bitmap_->Test(obj)); + compacted_klass = PostCompactAddress(klass, old_gen_end_, moving_space_end_); + compacted_obj = PostCompactAddress(obj, old_gen_end_, moving_space_end_); + DCHECK_GT(compacted_klass, compacted_obj); + } + // An object (and therefore its class as well) after mid-gen will be + // considered again during marking in next GC. So remove all entries + // from this point onwards. + if (compacted_obj == nullptr || reinterpret_cast<uint8_t*>(compacted_obj) >= mid_gen_end_) { + class_after_obj_map_.erase(iter, class_after_obj_map_.end()); + break; + } else if (mid_to_old_promo_bit_vec_.get() != nullptr) { + if (reinterpret_cast<uint8_t*>(compacted_klass) >= old_gen_end_) { + DCHECK(mid_to_old_promo_bit_vec_->IsBitSet( + (reinterpret_cast<uint8_t*>(compacted_obj) - old_gen_end_) / kAlignment)); + } + if (reinterpret_cast<uint8_t*>(compacted_klass) < mid_gen_end_) { + DCHECK(mid_to_old_promo_bit_vec_->IsBitSet( + (reinterpret_cast<uint8_t*>(compacted_klass) - old_gen_end_) / kAlignment)); + } + } + auto nh = class_after_obj_map_.extract(iter++); + nh.key() = ObjReference::FromMirrorPtr(compacted_klass); + nh.mapped() = ObjReference::FromMirrorPtr(compacted_obj); + auto success = class_after_obj_map_.insert(iter, std::move(nh)); + CHECK_EQ(success->first.AsMirrorPtr(), compacted_klass); + } + + // Dirty the cards for objects captured from native-roots during marking-phase. + accounting::CardTable* card_table = heap_->GetCardTable(); + for (auto obj : dirty_cards_later_vec_) { + // Only moving and non-moving spaces are relevant as the remaining + // spaces are all immune-spaces which anyways use card-table. + if (HasAddress(obj) || non_moving_space_->HasAddress(obj)) { + card_table->MarkCard(PostCompactAddress(obj, black_dense_end_, moving_space_end_)); + } + } + } + dirty_cards_later_vec_.clear(); + + // Copy mid-gen bitmap into moving-space's mark-bitmap + if (mid_to_old_promo_bit_vec_.get() != nullptr) { + DCHECK_EQ(mid_to_old_promo_bit_vec_->GetBitSizeOf(), + (mid_gen_end_ - old_gen_end_) / kObjectAlignment); + uint32_t* bitmap_begin = reinterpret_cast<uint32_t*>(moving_space_bitmap_->Begin()); + DCHECK(IsAligned<kObjectAlignment * BitVector::kWordBits>(gPageSize)); + size_t index = (old_gen_end_ - moving_space_begin_) / kObjectAlignment / BitVector::kWordBits; + mid_to_old_promo_bit_vec_->CopyTo(&bitmap_begin[index], + mid_to_old_promo_bit_vec_->GetSizeOf()); + mid_to_old_promo_bit_vec_.release(); + } + // Promote all mid-gen objects to old-gen and young-gen objects to mid-gen + // for next GC cycle. + old_gen_end_ = mid_gen_end_; + mid_gen_end_ = post_compact_end_; + post_compact_end_ = nullptr; + + // Verify (in debug builds) after updating mark-bitmap if class-after-object + // map is correct or not. + for (auto iter : class_after_obj_map_) { + DCHECK(moving_space_bitmap_->Test(iter.second.AsMirrorPtr())); + mirror::Object* klass = iter.first.AsMirrorPtr(); + DCHECK_IMPLIES(!moving_space_bitmap_->Test(klass), + reinterpret_cast<uint8_t*>(klass) >= old_gen_end_); + } + } else { + class_after_obj_map_.clear(); + } + // Black-dense region, which requires bitmap for object-walk, could be larger + // than old-gen. Therefore, until next GC retain the bitmap for entire + // black-dense region. At the beginning of next cycle, we clear [old_gen_end_, + // moving_space_end_). + mark_bitmap_clear_end = std::max(black_dense_end_, mark_bitmap_clear_end); + DCHECK_ALIGNED_PARAM(mark_bitmap_clear_end, gPageSize); + if (moving_space_begin_ == mark_bitmap_clear_end) { moving_space_bitmap_->Clear(); } else { - DCHECK_LT(moving_space_begin_, black_dense_end_); - DCHECK_LE(black_dense_end_, moving_space_end_); - moving_space_bitmap_->ClearRange(reinterpret_cast<mirror::Object*>(black_dense_end_), + DCHECK_LT(moving_space_begin_, mark_bitmap_clear_end); + DCHECK_LE(mark_bitmap_clear_end, moving_space_end_); + moving_space_bitmap_->ClearRange(reinterpret_cast<mirror::Object*>(mark_bitmap_clear_end), reinterpret_cast<mirror::Object*>(moving_space_end_)); } - bump_pointer_space_->SetBlackDenseRegionSize(black_dense_end_ - moving_space_begin_); + bump_pointer_space_->SetBlackDenseRegionSize(mark_bitmap_clear_end - moving_space_begin_); if (UNLIKELY(is_zygote && IsValidFd(uffd_))) { // This unregisters all ranges as a side-effect. @@ -4275,6 +4891,9 @@ void MarkCompact::FinishPhase() { } CHECK(mark_stack_->IsEmpty()); // Ensure that the mark stack is empty. mark_stack_->Reset(); + ZeroAndReleaseMemory(compaction_buffers_map_.Begin(), compaction_buffers_map_.Size()); + info_map_.MadviseDontNeedAndZero(); + live_words_bitmap_->ClearBitmap(); DCHECK_EQ(thread_running_gc_, Thread::Current()); if (kIsDebugBuild) { MutexLock mu(thread_running_gc_, lock_); @@ -4282,12 +4901,24 @@ void MarkCompact::FinishPhase() { updated_roots_->clear(); } } - class_after_obj_map_.clear(); linear_alloc_arenas_.clear(); { ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_); WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_); heap_->ClearMarkedObjects(); + if (use_generational_) { + // Clear the bits set temporarily for black allocations in non-moving + // space in UpdateNonMovingSpaceBlackAllocations() so that objects are + // considered for GC in next cycle. + accounting::ObjectStack* stack = heap_->GetAllocationStack(); + const StackReference<mirror::Object>* limit = stack->End(); + for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) { + mirror::Object* obj = it->AsMirrorPtr(); + if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) { + non_moving_space_bitmap_->Clear(obj); + } + } + } } GcVisitedArenaPool* arena_pool = static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool()); diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h index dd9fefb2a9..0f15f52b58 100644 --- a/runtime/gc/collector/mark_compact.h +++ b/runtime/gc/collector/mark_compact.h @@ -25,6 +25,7 @@ #include "barrier.h" #include "base/atomic.h" +#include "base/bit_vector.h" #include "base/gc_visited_arena_pool.h" #include "base/macros.h" #include "base/mutex.h" @@ -53,6 +54,64 @@ class BumpPointerSpace; } // namespace space namespace collector { +class MarkCompact; + +// The actual young GC code is also implemented in MarkCompact class. However, +// using this class saves us from creating duplicate data-structures, which +// would have happened with two instances of MarkCompact. +class YoungMarkCompact final : public GarbageCollector { + public: + YoungMarkCompact(Heap* heap, MarkCompact* main); + + void RunPhases() override REQUIRES(!Locks::mutator_lock_); + + GcType GetGcType() const override { return kGcTypeSticky; } + + CollectorType GetCollectorType() const override { return kCollectorTypeCMC; } + + // None of the following methods are ever called as actual GC is performed by MarkCompact. + + mirror::Object* MarkObject([[maybe_unused]] mirror::Object* obj) override { + UNIMPLEMENTED(FATAL); + UNREACHABLE(); + } + void MarkHeapReference([[maybe_unused]] mirror::HeapReference<mirror::Object>* obj, + [[maybe_unused]] bool do_atomic_update) override { + UNIMPLEMENTED(FATAL); + } + void VisitRoots([[maybe_unused]] mirror::Object*** roots, + [[maybe_unused]] size_t count, + [[maybe_unused]] const RootInfo& info) override { + UNIMPLEMENTED(FATAL); + } + void VisitRoots([[maybe_unused]] mirror::CompressedReference<mirror::Object>** roots, + [[maybe_unused]] size_t count, + [[maybe_unused]] const RootInfo& info) override { + UNIMPLEMENTED(FATAL); + } + bool IsNullOrMarkedHeapReference([[maybe_unused]] mirror::HeapReference<mirror::Object>* obj, + [[maybe_unused]] bool do_atomic_update) override { + UNIMPLEMENTED(FATAL); + UNREACHABLE(); + } + void RevokeAllThreadLocalBuffers() override { UNIMPLEMENTED(FATAL); } + + void DelayReferenceReferent([[maybe_unused]] ObjPtr<mirror::Class> klass, + [[maybe_unused]] ObjPtr<mirror::Reference> reference) override { + UNIMPLEMENTED(FATAL); + } + mirror::Object* IsMarked([[maybe_unused]] mirror::Object* obj) override { + UNIMPLEMENTED(FATAL); + UNREACHABLE(); + } + void ProcessMarkStack() override { UNIMPLEMENTED(FATAL); } + + private: + MarkCompact* const main_collector_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(YoungMarkCompact); +}; + class MarkCompact final : public GarbageCollector { public: using SigbusCounterType = uint32_t; @@ -83,9 +142,7 @@ class MarkCompact final : public GarbageCollector { // is asserted in the function. bool SigbusHandler(siginfo_t* info) REQUIRES(!lock_) NO_THREAD_SAFETY_ANALYSIS; - GcType GetGcType() const override { - return kGcTypeFull; - } + GcType GetGcType() const override { return kGcTypePartial; } CollectorType GetCollectorType() const override { return kCollectorTypeCMC; @@ -149,6 +206,11 @@ class MarkCompact final : public GarbageCollector { // GcVisitedArenaPool, which mostly happens only once. void AddLinearAllocSpaceData(uint8_t* begin, size_t len); + // Called by Heap::PreZygoteFork() to reset generational heap pointers and + // other data structures as the moving space gets completely evicted into new + // zygote-space. + void ResetGenerationalState(); + // In copy-mode of userfaultfd, we don't need to reach a 'processed' state as // it's given that processing thread also copies the page, thereby mapping it. // The order is important as we may treat them as integers. Also @@ -171,6 +233,8 @@ class MarkCompact final : public GarbageCollector { kClampInfoFinished }; + friend void YoungMarkCompact::RunPhases(); + private: using ObjReference = mirror::CompressedReference<mirror::Object>; static constexpr uint32_t kPageStateMask = (1 << BitSizeOf<uint8_t>()) - 1; @@ -273,12 +337,14 @@ class MarkCompact final : public GarbageCollector { void SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); - // Update the reference at given offset in the given object with post-compact - // address. [begin, end) is moving-space range. - ALWAYS_INLINE void UpdateRef(mirror::Object* obj, - MemberOffset offset, - uint8_t* begin, - uint8_t* end) REQUIRES_SHARED(Locks::mutator_lock_); + // Update the reference at 'offset' in 'obj' with post-compact address, and + // return the new address. [begin, end) is a range in which compaction is + // happening. So post-compact address needs to be computed only for + // pre-compact references in this range. + ALWAYS_INLINE mirror::Object* UpdateRef(mirror::Object* obj, + MemberOffset offset, + uint8_t* begin, + uint8_t* end) REQUIRES_SHARED(Locks::mutator_lock_); // Verify that the gc-root is updated only once. Returns false if the update // shouldn't be done. @@ -286,20 +352,22 @@ class MarkCompact final : public GarbageCollector { mirror::Object* old_ref, const RootInfo& info) REQUIRES_SHARED(Locks::mutator_lock_); - // Update the given root with post-compact address. [begin, end) is - // moving-space range. - ALWAYS_INLINE void UpdateRoot(mirror::CompressedReference<mirror::Object>* root, - uint8_t* begin, - uint8_t* end, - const RootInfo& info = RootInfo(RootType::kRootUnknown)) + // Update the given root with post-compact address and return the new address. [begin, end) + // is a range in which compaction is happening. So post-compact address needs to be computed + // only for pre-compact references in this range. + ALWAYS_INLINE mirror::Object* UpdateRoot(mirror::CompressedReference<mirror::Object>* root, + uint8_t* begin, + uint8_t* end, + const RootInfo& info = RootInfo(RootType::kRootUnknown)) REQUIRES_SHARED(Locks::mutator_lock_); - ALWAYS_INLINE void UpdateRoot(mirror::Object** root, - uint8_t* begin, - uint8_t* end, - const RootInfo& info = RootInfo(RootType::kRootUnknown)) + ALWAYS_INLINE mirror::Object* UpdateRoot(mirror::Object** root, + uint8_t* begin, + uint8_t* end, + const RootInfo& info = RootInfo(RootType::kRootUnknown)) REQUIRES_SHARED(Locks::mutator_lock_); - // Given the pre-compact address, the function returns the post-compact - // address of the given object. [begin, end) is moving-space range. + // If the given pre-compact address (old_ref) is in [begin, end) range of moving-space, + // then the function returns the computed post-compact address. Otherwise, 'old_ref' is + // returned. ALWAYS_INLINE mirror::Object* PostCompactAddress(mirror::Object* old_ref, uint8_t* begin, uint8_t* end) const @@ -318,8 +386,8 @@ class MarkCompact final : public GarbageCollector { REQUIRES_SHARED(Locks::mutator_lock_); // Clears (for alloc spaces in the beginning of marking phase) or ages the // card table. Also, identifies immune spaces and mark bitmap. - void PrepareCardTableForMarking(bool clear_alloc_space_cards) - REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); + void PrepareForMarking(bool pre_marking) REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); // Perform one last round of marking, identifying roots from dirty cards // during a stop-the-world (STW) pause. @@ -333,15 +401,20 @@ class MarkCompact final : public GarbageCollector { // during concurrent compaction. Also determines a black-dense region at the // beginning of the moving space which is not compacted. Returns false if // performing compaction isn't required. - bool PrepareForCompaction() REQUIRES_SHARED(Locks::mutator_lock_); + bool PrepareForCompaction() REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(!Locks::heap_bitmap_lock_); // Copy gPageSize live bytes starting from 'offset' (within the moving space), // which must be within 'obj', into the gPageSize sized memory pointed by 'addr'. // Then update the references within the copied objects. The boundary objects are // partially updated such that only the references that lie in the page are updated. // This is necessary to avoid cascading userfaults. - void CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* addr, bool needs_memset_zero) - REQUIRES_SHARED(Locks::mutator_lock_); + template <bool kSetupForGenerational> + void CompactPage(mirror::Object* obj, + uint32_t offset, + uint8_t* addr, + uint8_t* to_space_addr, + bool needs_memset_zero) REQUIRES_SHARED(Locks::mutator_lock_); // Compact the bump-pointer space. Pass page that should be used as buffer for // userfaultfd. template <int kMode> @@ -359,6 +432,7 @@ class MarkCompact final : public GarbageCollector { // Update all the objects in the given non-moving page. 'first' object // could have started in some preceding page. + template <bool kSetupForGenerational> void UpdateNonMovingPage(mirror::Object* first, uint8_t* page, ptrdiff_t from_space_diff, @@ -595,27 +669,34 @@ class MarkCompact final : public GarbageCollector { void UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) REQUIRES_SHARED(Locks::mutator_lock_); + void SweepArray(accounting::ObjectStack* obj_arr, bool swap_bitmaps) + REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); + + // Set bit corresponding to 'obj' in 'mid_to_old_promo_bit_vec_' bit-vector. + // 'obj' is the post-compacted object in mid-gen, which will get promoted to + // old-gen and hence 'mid_to_old_promo_bit_vec_' is copied into mark-bitmap at + // the end of GC for next GC cycle. + void SetBitForMidToOldPromotion(uint8_t* obj); + // Scan old-gen for young GCs by looking for cards that are at least 'aged' in + // the card-table corresponding to moving and non-moving spaces. + void ScanOldGenObjects() REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_); + // For checkpoints Barrier gc_barrier_; - // Every object inside the immune spaces is assumed to be marked. - ImmuneSpaces immune_spaces_; // Required only when mark-stack is accessed in shared mode, which happens // when collecting thread-stack roots using checkpoint. Otherwise, we use it // to synchronize on updated_roots_ in debug-builds. Mutex lock_; - accounting::ObjectStack* mark_stack_; - // Special bitmap wherein all the bits corresponding to an object are set. - // TODO: make LiveWordsBitmap encapsulated in this class rather than a - // pointer. We tend to access its members in performance-sensitive - // code-path. Also, use a single MemMap for all the GC's data structures, - // which we will clear in the end. This would help in limiting the number of - // VMAs that get created in the kernel. - std::unique_ptr<LiveWordsBitmap<kAlignment>> live_words_bitmap_; - // Track GC-roots updated so far in a GC-cycle. This is to confirm that no - // GC-root is updated twice. - // TODO: Must be replaced with an efficient mechanism eventually. Or ensure - // that double updation doesn't happen in the first place. - std::unique_ptr<std::unordered_set<void*>> updated_roots_ GUARDED_BY(lock_); + // Counters to synchronize mutator threads and gc-thread at the end of + // compaction. Counter 0 represents the number of mutators still working on + // moving space pages which started before gc-thread finished compacting pages, + // whereas the counter 1 represents those which started afterwards but + // before unregistering the space from uffd. Once counter 1 reaches 0, the + // gc-thread madvises spaces and data structures like page-status array. + // Both the counters are set to 0 before compaction begins. They are or'ed + // with kSigbusCounterCompactionDoneMask one-by-one by gc-thread after + // compaction to communicate the status to future mutators. + std::atomic<SigbusCounterType> sigbus_in_progress_count_[2]; MemMap from_space_map_; // Any array of live-bytes in logical chunks of kOffsetChunkSize size // in the 'to-be-compacted' space. @@ -668,6 +749,31 @@ class MarkCompact final : public GarbageCollector { // either at the pair whose class is lower than the first page to be freed, or at the // pair whose object is not yet compacted. ClassAfterObjectMap::const_reverse_iterator class_after_obj_iter_; + // Every object inside the immune spaces is assumed to be marked. + ImmuneSpaces immune_spaces_; + // Bit-vector to store bits for objects which are promoted from mid-gen to + // old-gen during compaction. Later in FinishPhase() it's copied into + // mark-bitmap of moving-space. + std::unique_ptr<BitVector> mid_to_old_promo_bit_vec_; + + // List of objects found to have native gc-roots into young-gen during + // marking. Cards corresponding to these objects are dirtied at the end of GC. + // These have to be captured during marking phase as we don't update + // native-roots during compaction. + std::vector<mirror::Object*> dirty_cards_later_vec_; + space::ContinuousSpace* non_moving_space_; + space::BumpPointerSpace* const bump_pointer_space_; + Thread* thread_running_gc_; + // Length of 'chunk_info_vec_' vector (defined below). + size_t vector_length_; + size_t live_stack_freeze_size_; + size_t non_moving_first_objs_count_; + // Length of first_objs_moving_space_ and pre_compact_offset_moving_space_ + // arrays. Also the number of pages which are to be compacted. + size_t moving_first_objs_count_; + // Number of pages containing black-allocated objects, indicating number of + // pages to be slid. + size_t black_page_count_; // Used by FreeFromSpacePages() for maintaining markers in the moving space for // how far the pages have been reclaimed (madvised) and checked. // @@ -688,30 +794,13 @@ class MarkCompact final : public GarbageCollector { // compacted contents for batching. uint8_t* cur_reclaimable_page_; - space::ContinuousSpace* non_moving_space_; - space::BumpPointerSpace* const bump_pointer_space_; - // The main space bitmap - accounting::ContinuousSpaceBitmap* const moving_space_bitmap_; + // Mark bits for non-moving space accounting::ContinuousSpaceBitmap* non_moving_space_bitmap_; - Thread* thread_running_gc_; // Array of moving-space's pages' compaction status, which is stored in the // least-significant byte. kProcessed entries also contain the from-space // offset of the page which contains the compacted contents of the ith // to-space page. Atomic<uint32_t>* moving_pages_status_; - size_t vector_length_; - size_t live_stack_freeze_size_; - - uint64_t bytes_scanned_; - - // For every page in the to-space (post-compact heap) we need to know the - // first object from which we must compact and/or update references. This is - // for both non-moving and moving space. Additionally, for the moving-space, - // we also need the offset within the object from where we need to start - // copying. - // chunk_info_vec_ holds live bytes for chunks during marking phase. After - // marking we perform an exclusive scan to compute offset for every chunk. - uint32_t* chunk_info_vec_; // For pages before black allocations, pre_compact_offset_moving_space_[i] // holds offset within the space from where the objects need to be copied in // the ith post-compact page. @@ -727,70 +816,97 @@ class MarkCompact final : public GarbageCollector { // First object for every page. It could be greater than the page's start // address, or null if the page is empty. ObjReference* first_objs_non_moving_space_; - size_t non_moving_first_objs_count_; - // Length of first_objs_moving_space_ and pre_compact_offset_moving_space_ - // arrays. Also the number of pages which are to be compacted. - size_t moving_first_objs_count_; - // Number of pages containing black-allocated objects, indicating number of - // pages to be slid. - size_t black_page_count_; + // Cache (from_space_begin_ - bump_pointer_space_->Begin()) so that we can + // compute from-space address of a given pre-comapct address efficiently. + ptrdiff_t from_space_slide_diff_; uint8_t* from_space_begin_; + + // The moving space markers are ordered as follows: + // [moving_space_begin_, black_dense_end_, mid_gen_end_, post_compact_end_, moving_space_end_) + + // End of compacted space. Used for computing post-compact address of black + // allocated objects. Aligned up to page size. + uint8_t* post_compact_end_; + + // BEGIN HOT FIELDS: accessed per object + + accounting::ObjectStack* mark_stack_; + uint64_t bytes_scanned_; + // Number of objects freed during this GC in moving space. It is decremented + // every time an object is discovered. And total-object count is added to it + // in MarkingPause(). It reaches the correct count only once the marking phase + // is completed. + int32_t freed_objects_; + // Set to true when doing young gen collection. + bool young_gen_; + const bool use_generational_; + // True while compacting. + bool compacting_; + // Mark bits for main space + accounting::ContinuousSpaceBitmap* const moving_space_bitmap_; // Cached values of moving-space range to optimize checking if reference - // belongs to moving-space or not. May get updated if and when heap is - // clamped. + // belongs to moving-space or not. May get updated if and when heap is clamped. uint8_t* const moving_space_begin_; uint8_t* moving_space_end_; + // In generational-mode, we maintain 3 generations: young, mid, and old. + // Mid generation is collected during young collections. This means objects + // need to survive two GCs before they get promoted to old-gen. This helps + // in avoiding pre-mature promotion of objects which are allocated just + // prior to a young collection but are short-lived. + // Set to moving_space_begin_ if compacting the entire moving space. // Otherwise, set to a page-aligned address such that [moving_space_begin_, // black_dense_end_) is considered to be densely populated with reachable - // objects and hence is not compacted. - uint8_t* black_dense_end_; + // objects and hence is not compacted. In generational mode, old-gen is + // treated just like black-dense region. + union { + uint8_t* black_dense_end_; + uint8_t* old_gen_end_; + }; + // Prior to compaction, 'mid_gen_end_' represents end of 'pre-compacted' + // mid-gen. During compaction, it represents 'post-compacted' end of mid-gen. + // This is done in PrepareForCompaction(). At the end of GC, in FinishPhase(), + // mid-gen gets consumed/promoted to old-gen, and young-gen becomes mid-gen, + // in preparation for the next GC cycle. + uint8_t* mid_gen_end_; + + // BEGIN HOT FIELDS: accessed per reference update + + // Special bitmap wherein all the bits corresponding to an object are set. + // TODO: make LiveWordsBitmap encapsulated in this class rather than a + // pointer. We tend to access its members in performance-sensitive + // code-path. Also, use a single MemMap for all the GC's data structures, + // which we will clear in the end. This would help in limiting the number of + // VMAs that get created in the kernel. + std::unique_ptr<LiveWordsBitmap<kAlignment>> live_words_bitmap_; + // For every page in the to-space (post-compact heap) we need to know the + // first object from which we must compact and/or update references. This is + // for both non-moving and moving space. Additionally, for the moving-space, + // we also need the offset within the object from where we need to start + // copying. + // chunk_info_vec_ holds live bytes for chunks during marking phase. After + // marking we perform an exclusive scan to compute offset for every chunk. + uint32_t* chunk_info_vec_; // moving-space's end pointer at the marking pause. All allocations beyond // this will be considered black in the current GC cycle. Aligned up to page // size. uint8_t* black_allocations_begin_; - // End of compacted space. Use for computing post-compact addr of black - // allocated objects. Aligned up to page size. - uint8_t* post_compact_end_; // Cache (black_allocations_begin_ - post_compact_end_) for post-compact // address computations. ptrdiff_t black_objs_slide_diff_; - // Cache (from_space_begin_ - bump_pointer_space_->Begin()) so that we can - // compute from-space address of a given pre-comapct addr efficiently. - ptrdiff_t from_space_slide_diff_; - // TODO: Remove once an efficient mechanism to deal with double root updation - // is incorporated. - void* stack_high_addr_; - void* stack_low_addr_; + // END HOT FIELDS: accessed per reference update + // END HOT FIELDS: accessed per object uint8_t* conc_compaction_termination_page_; - PointerSize pointer_size_; - // Number of objects freed during this GC in moving space. It is decremented - // every time an object is discovered. And total-object count is added to it - // in MarkingPause(). It reaches the correct count only once the marking phase - // is completed. - int32_t freed_objects_; // Userfault file descriptor, accessed only by the GC itself. // kFallbackMode value indicates that we are in the fallback mode. int uffd_; - // Counters to synchronize mutator threads and gc-thread at the end of - // compaction. Counter 0 represents the number of mutators still working on - // moving space pages which started before gc-thread finished compacting pages, - // whereas the counter 1 represents those which started afterwards but - // before unregistering the space from uffd. Once counter 1 reaches 0, the - // gc-thread madvises spaces and data structures like page-status array. - // Both the counters are set to 0 before compaction begins. They are or'ed - // with kSigbusCounterCompactionDoneMask one-by-one by gc-thread after - // compaction to communicate the status to future mutators. - std::atomic<SigbusCounterType> sigbus_in_progress_count_[2]; // When using SIGBUS feature, this counter is used by mutators to claim a page // out of compaction buffers to be used for the entire compaction cycle. std::atomic<uint16_t> compaction_buffer_counter_; - // True while compacting. - bool compacting_; // Set to true in MarkingPause() to indicate when allocation_stack_ should be // checked in IsMarked() for black allocations. bool marking_done_; @@ -805,6 +921,16 @@ class MarkCompact final : public GarbageCollector { // is also clamped, then we set it to 'Finished'. ClampInfoStatus clamp_info_map_status_; + // Track GC-roots updated so far in a GC-cycle. This is to confirm that no + // GC-root is updated twice. + // TODO: Must be replaced with an efficient mechanism eventually. Or ensure + // that double updation doesn't happen in the first place. + std::unique_ptr<std::unordered_set<void*>> updated_roots_ GUARDED_BY(lock_); + // TODO: Remove once an efficient mechanism to deal with double root updation + // is incorporated. + void* stack_high_addr_; + void* stack_low_addr_; + class FlipCallback; class ThreadFlipVisitor; class VerifyRootMarkedVisitor; @@ -813,7 +939,8 @@ class MarkCompact final : public GarbageCollector { template <size_t kBufferSize> class ThreadRootsVisitor; class RefFieldsVisitor; - template <bool kCheckBegin, bool kCheckEnd> class RefsUpdateVisitor; + template <bool kCheckBegin, bool kCheckEnd, bool kDirtyOldToMid = false> + class RefsUpdateVisitor; class ArenaPoolPageUpdater; class ClassLoaderRootsUpdater; class LinearAllocPageUpdater; diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 5ceeeb75af..531cbcf97b 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -149,11 +149,12 @@ DEFINE_RUNTIME_DEBUG_FLAG(Heap, kStressCollectorTransition); // Minimum amount of remaining bytes before a concurrent GC is triggered. static constexpr size_t kMinConcurrentRemainingBytes = 128 * KB; static constexpr size_t kMaxConcurrentRemainingBytes = 512 * KB; -// Sticky GC throughput adjustment, divided by 4. Increasing this causes sticky GC to occur more -// relative to partial/full GC. This may be desirable since sticky GCs interfere less with mutator -// threads (lower pauses, use less memory bandwidth). -static double GetStickyGcThroughputAdjustment(bool use_generational_cc) { - return use_generational_cc ? 0.5 : 1.0; +// Sticky GC throughput adjustment. Increasing this causes sticky GC to occur more +// relative to partial/full GC. This may be desirable since sticky GCs interfere less +// with mutator threads (lower pauses, use less memory bandwidth). The value +// (1.0) for non-generational GC case is fixed and shall never change. +static double GetStickyGcThroughputAdjustment(bool use_generational_gc) { + return use_generational_gc ? 0.5 : 1.0; } // Whether or not we compact the zygote in PreZygoteFork. static constexpr bool kCompactZygote = kMovingCollector; @@ -307,7 +308,7 @@ Heap::Heap(size_t initial_size, bool gc_stress_mode, bool measure_gc_performance, bool use_homogeneous_space_compaction_for_oom, - bool use_generational_cc, + bool use_generational_gc, uint64_t min_interval_homogeneous_space_compaction_by_oom, bool dump_region_info_before_gc, bool dump_region_info_after_gc) @@ -375,11 +376,10 @@ Heap::Heap(size_t initial_size, * verification is enabled, we limit the size of allocation stacks to speed up their * searching. */ - max_allocation_stack_size_(kGCALotMode - ? kGcAlotAllocationStackSize - : (kVerifyObjectSupport > kVerifyObjectModeFast) - ? kVerifyObjectAllocationStackSize - : kDefaultAllocationStackSize), + max_allocation_stack_size_(kGCALotMode ? kGcAlotAllocationStackSize + : (kVerifyObjectSupport > kVerifyObjectModeFast) + ? kVerifyObjectAllocationStackSize + : kDefaultAllocationStackSize), current_allocator_(kAllocatorTypeDlMalloc), current_non_moving_allocator_(kAllocatorTypeNonMoving), bump_pointer_space_(nullptr), @@ -408,7 +408,7 @@ Heap::Heap(size_t initial_size, pending_collector_transition_(nullptr), pending_heap_trim_(nullptr), use_homogeneous_space_compaction_for_oom_(use_homogeneous_space_compaction_for_oom), - use_generational_cc_(use_generational_cc), + use_generational_gc_(use_generational_gc), running_collection_is_blocking_(false), blocking_gc_count_(0U), blocking_gc_time_(0U), @@ -652,7 +652,7 @@ Heap::Heap(size_t initial_size, space::RegionSpace::CreateMemMap(kRegionSpaceName, capacity_ * 2, request_begin); CHECK(region_space_mem_map.IsValid()) << "No region space mem map"; region_space_ = space::RegionSpace::Create( - kRegionSpaceName, std::move(region_space_mem_map), use_generational_cc_); + kRegionSpaceName, std::move(region_space_mem_map), use_generational_gc_); AddSpace(region_space_); } else if (IsMovingGc(foreground_collector_type_)) { // Create bump pointer spaces. @@ -778,9 +778,50 @@ Heap::Heap(size_t initial_size, concurrent_start_bytes_ = std::numeric_limits<size_t>::max(); } CHECK_NE(target_footprint_.load(std::memory_order_relaxed), 0U); - // Create our garbage collectors. + CreateGarbageCollectors(measure_gc_performance); + if (!GetBootImageSpaces().empty() && non_moving_space_ != nullptr && + (is_zygote || separate_non_moving_space)) { + // Check that there's no gap between the image space and the non moving space so that the + // immune region won't break (eg. due to a large object allocated in the gap). This is only + // required when we're the zygote. + // Space with smallest Begin(). + space::ImageSpace* first_space = nullptr; + for (space::ImageSpace* space : boot_image_spaces_) { + if (first_space == nullptr || space->Begin() < first_space->Begin()) { + first_space = space; + } + } + bool no_gap = MemMap::CheckNoGaps(*first_space->GetMemMap(), *non_moving_space_->GetMemMap()); + if (!no_gap) { + PrintFileToLog("/proc/self/maps", LogSeverity::ERROR); + MemMap::DumpMaps(LOG_STREAM(ERROR), /* terse= */ true); + LOG(FATAL) << "There's a gap between the image space and the non-moving space"; + } + } + // Perfetto Java Heap Profiler Support. + if (runtime->IsPerfettoJavaHeapStackProfEnabled()) { + // Perfetto Plugin is loaded and enabled, initialize the Java Heap Profiler. + InitPerfettoJavaHeapProf(); + } else { + // Disable the Java Heap Profiler. + GetHeapSampler().DisableHeapSampler(); + } + + instrumentation::Instrumentation* const instrumentation = runtime->GetInstrumentation(); + if (gc_stress_mode_) { + backtrace_lock_ = new Mutex("GC complete lock"); + } + if (is_running_on_memory_tool_ || gc_stress_mode_) { + instrumentation->InstrumentQuickAllocEntryPoints(); + } + if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) { + LOG(INFO) << "Heap() exiting"; + } +} + +void Heap::CreateGarbageCollectors(bool measure_gc_performance) { for (size_t i = 0; i < 2; ++i) { - const bool concurrent = i != 0; + const bool concurrent = (i != 0); if ((MayUseCollector(kCollectorTypeCMS) && concurrent) || (MayUseCollector(kCollectorTypeMS) && !concurrent)) { garbage_collectors_.push_back(new collector::MarkSweep(this, concurrent)); @@ -798,75 +839,41 @@ Heap::Heap(size_t initial_size, if (MayUseCollector(kCollectorTypeCMC)) { mark_compact_ = new collector::MarkCompact(this); garbage_collectors_.push_back(mark_compact_); + if (use_generational_gc_) { + young_mark_compact_ = new collector::YoungMarkCompact(this, mark_compact_); + garbage_collectors_.push_back(young_mark_compact_); + } } if (MayUseCollector(kCollectorTypeCC)) { concurrent_copying_collector_ = new collector::ConcurrentCopying(this, /*young_gen=*/false, - use_generational_cc_, + use_generational_gc_, "", measure_gc_performance); - if (use_generational_cc_) { - young_concurrent_copying_collector_ = new collector::ConcurrentCopying( - this, - /*young_gen=*/true, - use_generational_cc_, - "young", - measure_gc_performance); + if (use_generational_gc_) { + young_concurrent_copying_collector_ = + new collector::ConcurrentCopying(this, + /*young_gen=*/true, + use_generational_gc_, + "young", + measure_gc_performance); } active_concurrent_copying_collector_.store(concurrent_copying_collector_, std::memory_order_relaxed); DCHECK(region_space_ != nullptr); concurrent_copying_collector_->SetRegionSpace(region_space_); - if (use_generational_cc_) { + if (use_generational_gc_) { young_concurrent_copying_collector_->SetRegionSpace(region_space_); // At this point, non-moving space should be created. DCHECK(non_moving_space_ != nullptr); concurrent_copying_collector_->CreateInterRegionRefBitmaps(); } garbage_collectors_.push_back(concurrent_copying_collector_); - if (use_generational_cc_) { + if (use_generational_gc_) { garbage_collectors_.push_back(young_concurrent_copying_collector_); } } } - if (!GetBootImageSpaces().empty() && non_moving_space_ != nullptr && - (is_zygote || separate_non_moving_space)) { - // Check that there's no gap between the image space and the non moving space so that the - // immune region won't break (eg. due to a large object allocated in the gap). This is only - // required when we're the zygote. - // Space with smallest Begin(). - space::ImageSpace* first_space = nullptr; - for (space::ImageSpace* space : boot_image_spaces_) { - if (first_space == nullptr || space->Begin() < first_space->Begin()) { - first_space = space; - } - } - bool no_gap = MemMap::CheckNoGaps(*first_space->GetMemMap(), *non_moving_space_->GetMemMap()); - if (!no_gap) { - PrintFileToLog("/proc/self/maps", LogSeverity::ERROR); - MemMap::DumpMaps(LOG_STREAM(ERROR), /* terse= */ true); - LOG(FATAL) << "There's a gap between the image space and the non-moving space"; - } - } - // Perfetto Java Heap Profiler Support. - if (runtime->IsPerfettoJavaHeapStackProfEnabled()) { - // Perfetto Plugin is loaded and enabled, initialize the Java Heap Profiler. - InitPerfettoJavaHeapProf(); - } else { - // Disable the Java Heap Profiler. - GetHeapSampler().DisableHeapSampler(); - } - - instrumentation::Instrumentation* const instrumentation = runtime->GetInstrumentation(); - if (gc_stress_mode_) { - backtrace_lock_ = new Mutex("GC complete lock"); - } - if (is_running_on_memory_tool_ || gc_stress_mode_) { - instrumentation->InstrumentQuickAllocEntryPoints(); - } - if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) { - LOG(INFO) << "Heap() exiting"; - } } MemMap Heap::MapAnonymousPreferredAddress(const char* name, @@ -2315,7 +2322,7 @@ void Heap::ChangeCollector(CollectorType collector_type) { gc_plan_.clear(); switch (collector_type_) { case kCollectorTypeCC: { - if (use_generational_cc_) { + if (use_generational_gc_) { gc_plan_.push_back(collector::kGcTypeSticky); } gc_plan_.push_back(collector::kGcTypeFull); @@ -2327,6 +2334,9 @@ void Heap::ChangeCollector(CollectorType collector_type) { break; } case kCollectorTypeCMC: { + if (use_generational_gc_) { + gc_plan_.push_back(collector::kGcTypeSticky); + } gc_plan_.push_back(collector::kGcTypeFull); if (use_tlab_) { ChangeAllocator(kAllocatorTypeTLAB); @@ -2568,6 +2578,9 @@ void Heap::PreZygoteFork() { region_space_->GetMarkBitmap()->Clear(); } else { bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE); + if (gUseUserfaultfd && use_generational_gc_) { + MarkCompactCollector()->ResetGenerationalState(); + } } } if (temp_space_ != nullptr) { @@ -2859,10 +2872,13 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, break; case kCollectorTypeCMC: collector = mark_compact_; + if (use_generational_gc_ && gc_type == collector::kGcTypeSticky) { + collector = young_mark_compact_; + } break; case kCollectorTypeCC: collector::ConcurrentCopying* active_cc_collector; - if (use_generational_cc_) { + if (use_generational_gc_) { // TODO: Other threads must do the flip checkpoint before they start poking at // active_concurrent_copying_collector_. So we should not concurrency here. active_cc_collector = (gc_type == collector::kGcTypeSticky) ? @@ -3801,13 +3817,13 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran, collector::GcType non_sticky_gc_type = NonStickyGcType(); // Find what the next non sticky collector will be. collector::GarbageCollector* non_sticky_collector = FindCollectorByGcType(non_sticky_gc_type); - if (use_generational_cc_) { + if (use_generational_gc_) { if (non_sticky_collector == nullptr) { non_sticky_collector = FindCollectorByGcType(collector::kGcTypePartial); } CHECK(non_sticky_collector != nullptr); } - double sticky_gc_throughput_adjustment = GetStickyGcThroughputAdjustment(use_generational_cc_); + double sticky_gc_throughput_adjustment = GetStickyGcThroughputAdjustment(use_generational_gc_); // If the throughput of the current sticky GC >= throughput of the non sticky collector, then // do another sticky collection next. diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 20a74a475a..28179811bd 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -237,7 +237,7 @@ class Heap { bool gc_stress_mode, bool measure_gc_performance, bool use_homogeneous_space_compaction, - bool use_generational_cc, + bool use_generational_gc, uint64_t min_interval_homogeneous_space_compaction_by_oom, bool dump_region_info_before_gc, bool dump_region_info_after_gc); @@ -570,9 +570,7 @@ class Heap { return num_bytes_allocated_.fetch_add(bytes, std::memory_order_relaxed); } - bool GetUseGenerationalCC() const { - return use_generational_cc_; - } + bool GetUseGenerational() const { return use_generational_gc_; } // Returns the number of objects currently allocated. size_t GetObjectsAllocated() const @@ -825,9 +823,10 @@ class Heap { // Returns the active concurrent copying collector. collector::ConcurrentCopying* ConcurrentCopyingCollector() { + DCHECK(gUseReadBarrier); collector::ConcurrentCopying* active_collector = active_concurrent_copying_collector_.load(std::memory_order_relaxed); - if (use_generational_cc_) { + if (use_generational_gc_) { DCHECK((active_collector == concurrent_copying_collector_) || (active_collector == young_concurrent_copying_collector_)) << "active_concurrent_copying_collector: " << active_collector @@ -1050,6 +1049,8 @@ class Heap { double CalculateGcWeightedAllocatedBytes(uint64_t gc_last_process_cpu_time_ns, uint64_t current_process_cpu_time) const; + // Called only from the constructor. + void CreateGarbageCollectors(bool measure_gc_performance); // Create a mem map with a preferred base address. static MemMap MapAnonymousPreferredAddress(const char* name, uint8_t* request_begin, @@ -1640,10 +1641,15 @@ class Heap { std::vector<collector::GarbageCollector*> garbage_collectors_; collector::SemiSpace* semi_space_collector_; - collector::MarkCompact* mark_compact_; Atomic<collector::ConcurrentCopying*> active_concurrent_copying_collector_; - collector::ConcurrentCopying* young_concurrent_copying_collector_; - collector::ConcurrentCopying* concurrent_copying_collector_; + union { + collector::ConcurrentCopying* young_concurrent_copying_collector_; + collector::YoungMarkCompact* young_mark_compact_; + }; + union { + collector::ConcurrentCopying* concurrent_copying_collector_; + collector::MarkCompact* mark_compact_; + }; const bool is_running_on_memory_tool_; const bool use_tlab_; @@ -1688,10 +1694,11 @@ class Heap { // Whether or not we use homogeneous space compaction to avoid OOM errors. bool use_homogeneous_space_compaction_for_oom_; - // If true, enable generational collection when using the Concurrent Copying - // (CC) collector, i.e. use sticky-bit CC for minor collections and (full) CC - // for major collections. Set in Heap constructor. - const bool use_generational_cc_; + // If true, enable generational collection when using a concurrent collector + // like Concurrent Copying (CC) or Concurrent Mark Compact (CMC) collectors, + // i.e. use sticky-bit for minor collections and full heap for major collections. + // Set in Heap constructor. + const bool use_generational_gc_; // True if the currently running collection has made some thread wait. bool running_collection_is_blocking_ GUARDED_BY(gc_complete_lock_); diff --git a/runtime/gc/heap_test.cc b/runtime/gc/heap_test.cc index bd8fdc6b46..18e91da75d 100644 --- a/runtime/gc/heap_test.cc +++ b/runtime/gc/heap_test.cc @@ -165,7 +165,7 @@ TEST_F(HeapTest, GCMetrics) { if (fg_collector_type == kCollectorTypeCC || fg_collector_type == kCollectorTypeCMC) { // Only the Concurrent Copying and Concurrent Mark-Compact collectors enable // GC metrics at the moment. - if (heap->GetUseGenerationalCC()) { + if (heap->GetUseGenerational()) { // Check that full-heap and/or young-generation GC metrics are non-null // after trigerring the collection. EXPECT_PRED2( diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc index ad9c68da3e..75ca459843 100644 --- a/runtime/gc/space/bump_pointer_space.cc +++ b/runtime/gc/space/bump_pointer_space.cc @@ -80,6 +80,9 @@ void BumpPointerSpace::Clear() { memset(Begin(), 0, Limit() - Begin()); } CHECK_NE(madvise(Begin(), Limit() - Begin(), MADV_DONTNEED), -1) << "madvise failed"; + if (GetMarkBitmap() != nullptr) { + GetMarkBitmap()->Clear(); + } // Reset the end of the space back to the beginning, we move the end forward as we allocate // objects. SetEnd(Begin()); @@ -90,6 +93,7 @@ void BumpPointerSpace::Clear() { growth_end_ = Limit(); block_sizes_.clear(); main_block_size_ = 0; + black_dense_region_size_ = 0; } } diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc index e891739ec7..2aed181d71 100644 --- a/runtime/gc/space/region_space.cc +++ b/runtime/gc/space/region_space.cc @@ -215,7 +215,7 @@ bool RegionSpace::Region::GetUseGenerationalCC() { // We are retrieving the info from Heap, instead of the cached version in // RegionSpace, because accessing the Heap from a Region object is easier // than accessing the RegionSpace. - return art::Runtime::Current()->GetHeap()->GetUseGenerationalCC(); + return art::Runtime::Current()->GetHeap()->GetUseGenerational(); } inline bool RegionSpace::Region::ShouldBeEvacuated(EvacMode evac_mode) { diff --git a/runtime/parsed_options_test.cc b/runtime/parsed_options_test.cc index 973adb5a53..110754405a 100644 --- a/runtime/parsed_options_test.cc +++ b/runtime/parsed_options_test.cc @@ -149,7 +149,7 @@ TEST_F(ParsedOptionsTest, ParsedOptionsGenerationalCC) { EXPECT_TRUE(map.Exists(Opt::GcOption)); XGcOption xgc = map.GetOrDefault(Opt::GcOption); - ASSERT_TRUE(xgc.generational_cc); + ASSERT_TRUE(xgc.generational_gc); } TEST_F(ParsedOptionsTest, ParsedOptionsInstructionSet) { diff --git a/runtime/runtime.cc b/runtime/runtime.cc index b1e71d3bb1..b43fa1ca5c 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -221,6 +221,8 @@ struct TraceConfig { TraceClockSource clock_source; }; +extern bool ShouldUseGenerationalGC(); + namespace { #ifdef __APPLE__ @@ -1743,7 +1745,8 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { XGcOption xgc_option = runtime_options.GetOrDefault(Opt::GcOption); // Generational CC collection is currently only compatible with Baker read barriers. - bool use_generational_cc = kUseBakerReadBarrier && xgc_option.generational_cc; + bool use_generational_gc = (kUseBakerReadBarrier || gUseUserfaultfd) && + xgc_option.generational_gc && ShouldUseGenerationalGC(); // Cache the apex versions. InitializeApexVersions(); @@ -1792,7 +1795,7 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { xgc_option.gcstress_, xgc_option.measure_, runtime_options.GetOrDefault(Opt::EnableHSpaceCompactForOOM), - use_generational_cc, + use_generational_gc, runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs), runtime_options.Exists(Opt::DumpRegionInfoBeforeGC), runtime_options.Exists(Opt::DumpRegionInfoAfterGC)); diff --git a/runtime/runtime_globals.h b/runtime/runtime_globals.h index dc69063b97..d3963a52e1 100644 --- a/runtime/runtime_globals.h +++ b/runtime/runtime_globals.h @@ -122,24 +122,24 @@ static constexpr bool kMovingCollector = true; static constexpr bool kMarkCompactSupport = false && kMovingCollector; // True if we allow moving classes. static constexpr bool kMovingClasses = !kMarkCompactSupport; -// When using the Concurrent Copying (CC) collector, if -// `ART_USE_GENERATIONAL_CC` is true, enable generational collection by default, -// i.e. use sticky-bit CC for minor collections and (full) CC for major +// When using the Concurrent Collectors (CC or CMC), if +// `ART_USE_GENERATIONAL_GC` is true, enable generational collection by default, +// i.e. use sticky-bit CC/CMC for minor collections and (full) CC/CMC for major // collections. // This default value can be overridden with the runtime option -// `-Xgc:[no]generational_cc`. +// `-Xgc:[no]generational_gc`. // // TODO(b/67628039): Consider either: // - renaming this to a better descriptive name (e.g. -// `ART_USE_GENERATIONAL_CC_BY_DEFAULT`); or -// - removing `ART_USE_GENERATIONAL_CC` and having a fixed default value. +// `ART_USE_GENERATIONAL_GC_BY_DEFAULT`); or +// - removing `ART_USE_GENERATIONAL_GC` and having a fixed default value. // Any of these changes will require adjusting users of this preprocessor // directive and the corresponding build system environment variable (e.g. in // ART's continuous testing). -#ifdef ART_USE_GENERATIONAL_CC -static constexpr bool kEnableGenerationalCCByDefault = true; +#ifdef ART_USE_GENERATIONAL_GC +static constexpr bool kEnableGenerationalGCByDefault = true; #else -static constexpr bool kEnableGenerationalCCByDefault = false; +static constexpr bool kEnableGenerationalGCByDefault = false; #endif // If true, enable the tlab allocator by default. diff --git a/test/735-checker-condition-merging/expected-stderr.txt b/test/735-checker-condition-merging/expected-stderr.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/735-checker-condition-merging/expected-stderr.txt diff --git a/test/735-checker-condition-merging/expected-stdout.txt b/test/735-checker-condition-merging/expected-stdout.txt new file mode 100644 index 0000000000..4071f36257 --- /dev/null +++ b/test/735-checker-condition-merging/expected-stdout.txt @@ -0,0 +1,20 @@ +IfXLtzAElseB(-7): A +IfXLtzAElseB(42): B +IfXLtzAElseB_Move(-7): A +IfXLtzAElseB_Move(42): B +IfXLtzAElseB_EnvUse(-7): A +IfXLtzAElseB_EnvUse(42): B +IfXNullAElseB(null): A +IfXNullAElseB(new Object()): B +IfXNullAElseB_Move(null): A +IfXNullAElseB_Move(new Object()): B +IfXNullAElseB_EnvUse(null): A +IfXNullAElseB_EnvUse(new Object()): B +IfXNullAElseB_RefNoEnvInBlock(null, true): A +IfXNullAElseB_RefNoEnvInBlock(new Object(), true): B +IfXNullAElseB_RefNoEnvInBlock(null, false): C +IfXNullAElseB_RefNoEnvInBlock(new Object(), false): C +IfLt7_0AElseB_86LoadFromConstantTable(2.0, true): A +IfLt7_0AElseB_86LoadFromConstantTable(10.0, true): B +IfLt7_0AElseB_86LoadFromConstantTable(2.0, false): C +IfLt7_0AElseB_86LoadFromConstantTable(10.0, false): C diff --git a/test/735-checker-condition-merging/info.txt b/test/735-checker-condition-merging/info.txt new file mode 100644 index 0000000000..30790ab52f --- /dev/null +++ b/test/735-checker-condition-merging/info.txt @@ -0,0 +1 @@ +Test for merging `HCondition` to the user with the "emited at use site" approach. diff --git a/test/735-checker-condition-merging/src/Main.java b/test/735-checker-condition-merging/src/Main.java new file mode 100644 index 0000000000..98e9c4cdfe --- /dev/null +++ b/test/735-checker-condition-merging/src/Main.java @@ -0,0 +1,285 @@ +/* + * Copyright (C) 2025 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + private int intField; + + public static void main(String[] args) { + System.out.print("IfXLtzAElseB(-7): "); + $noinline$IfXLtzAElseB(-7); + System.out.print("IfXLtzAElseB(42): "); + $noinline$IfXLtzAElseB(42); + + System.out.print("IfXLtzAElseB_Move(-7): "); + new Main().$noinline$IfXLtzAElseB_Move(-7); + System.out.print("IfXLtzAElseB_Move(42): "); + new Main().$noinline$IfXLtzAElseB_Move(42); + + System.out.print("IfXLtzAElseB_EnvUse(-7): "); + $noinline$IfXLtzAElseB_EnvUse(-7); + System.out.print("IfXLtzAElseB_EnvUse(42): "); + $noinline$IfXLtzAElseB_EnvUse(42); + + System.out.print("IfXNullAElseB(null): "); + $noinline$IfXNullAElseB(null); + System.out.print("IfXNullAElseB(new Object()): "); + $noinline$IfXNullAElseB(new Object()); + + System.out.print("IfXNullAElseB_Move(null): "); + new Main().$noinline$IfXNullAElseB_Move(null); + System.out.print("IfXNullAElseB_Move(new Object()): "); + new Main().$noinline$IfXNullAElseB_Move(new Object()); + + System.out.print("IfXNullAElseB_EnvUse(null): "); + new Main().$noinline$IfXNullAElseB_EnvUse(null); + System.out.print("IfXNullAElseB_EnvUse(new Object()): "); + new Main().$noinline$IfXNullAElseB_EnvUse(new Object()); + + System.out.print("IfXNullAElseB_RefNoEnvInBlock(null, true): "); + new Main().$noinline$IfXNullAElseB_RefNoEnvInBlock(null, true); + System.out.print("IfXNullAElseB_RefNoEnvInBlock(new Object(), true): "); + new Main().$noinline$IfXNullAElseB_RefNoEnvInBlock(new Object(), true); + System.out.print("IfXNullAElseB_RefNoEnvInBlock(null, false): "); + new Main().$noinline$IfXNullAElseB_RefNoEnvInBlock(null, false); + System.out.print("IfXNullAElseB_RefNoEnvInBlock(new Object(), false): "); + new Main().$noinline$IfXNullAElseB_RefNoEnvInBlock(new Object(), false); + + System.out.print("IfLt7_0AElseB_86LoadFromConstantTable(2.0, true): "); + new Main().$noinline$IfLt7_0AElseB_86LoadFromConstantTable(2.0, true); + System.out.print("IfLt7_0AElseB_86LoadFromConstantTable(10.0, true): "); + new Main().$noinline$IfLt7_0AElseB_86LoadFromConstantTable(10.0, true); + System.out.print("IfLt7_0AElseB_86LoadFromConstantTable(2.0, false): "); + new Main().$noinline$IfLt7_0AElseB_86LoadFromConstantTable(2.0, false); + System.out.print("IfLt7_0AElseB_86LoadFromConstantTable(10.0, false): "); + new Main().$noinline$IfLt7_0AElseB_86LoadFromConstantTable(10.0, false); + + // Note: We do not test the code paths where `ConditionMoveWouldExtendReferenceLifetime()` + // in the "prepare_for_register_allocation" pass finds an instruction with environment + // between the `HCondition` and its user in this run-test. These are difficult to create + // from Java code and changes to other passes can easily invalidate such tests. Therefore + // we defer to using gtests for these cases. + } + + private static void $noinline$A() { + System.out.println("A"); + } + + private static void $noinline$B() { + System.out.println("B"); + } + + private static void $noinline$C() { + System.out.println("C"); + } + + private static boolean $inline$XLtz(int x) { + // After inlining, this shall be turned to a `HSelect` and then simplified as `HLessThan`. + return x < 0; + } + + private static boolean $inline$XNull(Object x) { + // After inlining, this shall be turned to a `HSelect` and then simplified as `HEqual`. + return x == null; + } + + private static boolean $inline$XLt7_0(double x) { + return x < 7.0; + } + + private static void $noinline$ignore(int ignored) {} + + /// CHECK-START: void Main.$noinline$IfXLtzAElseB(int) prepare_for_register_allocation (before) + /// CHECK: <<Cond:z\d+>> {{GreaterThanOrEqual|LessThan}} emitted_at_use_site:false + /// CHECK-NEXT: If [<<Cond>>] + + /// CHECK-START: void Main.$noinline$IfXLtzAElseB(int) prepare_for_register_allocation (after) + /// CHECK: <<Cond:z\d+>> {{GreaterThanOrEqual|LessThan}} emitted_at_use_site:true + /// CHECK-NEXT: If [<<Cond>>] + + public static void $noinline$IfXLtzAElseB(int x) { + if (x < 0) { + $noinline$A(); + } else { + $noinline$B(); + } + } + + /// CHECK-START: void Main.$noinline$IfXLtzAElseB_Move(int) prepare_for_register_allocation (before) + /// CHECK: <<Cond:z\d+>> LessThan emitted_at_use_site:false + /// CHECK-NEXT: InstanceFieldGet + // On X86, there can be also X86ComputeBaseMethodAddress here. + /// CHECK: If [<<Cond>>] + + /// CHECK-START: void Main.$noinline$IfXLtzAElseB_Move(int) prepare_for_register_allocation (after) + /// CHECK: InstanceFieldGet + // On X86, there can be also X86ComputeBaseMethodAddress here. + /// CHECK: <<Cond:z\d+>> LessThan emitted_at_use_site:true + /// CHECK-NEXT: If [<<Cond>>] + + public void $noinline$IfXLtzAElseB_Move(int x) { + boolean cond = $inline$XLtz(x); + + int value = intField; + if (cond) { + cond = false; // Avoid environment use below. + $noinline$A(); + } else { + cond = false; // Avoid environment use below. + $noinline$B(); + } + $noinline$ignore(value); + } + + /// CHECK-START: void Main.$noinline$IfXLtzAElseB_EnvUse(int) prepare_for_register_allocation (before) + /// CHECK: LessThan emitted_at_use_site:false + + /// CHECK-START: void Main.$noinline$IfXLtzAElseB_EnvUse(int) prepare_for_register_allocation (after) + /// CHECK-DAG: <<Cond:z\d+>> LessThan emitted_at_use_site:false + // Match an environment use. Use the fact that the <<Cond>> is in vreg 0. Otherwise we'd + // need to add a regex to match the earlier vregs which is difficult due to a regex eagerly + // consuming as much as possible but it could be curtailed by using the fact that there + // are no other boolean (`z`) values in the graph, for example with `{{([^z,]+,)*}}`. This + // would be much easier if we could put a variable inside the regex and make the entire + // env uses a single regex, `env:[[{{([^,]+,)*<<Cond>>(,[^,\]]+)*}}]]`. + /// CHECK-DAG: InvokeStaticOrDirect env:[[<<Cond>>{{(,[^,\]]+)*}}]] + + public static void $noinline$IfXLtzAElseB_EnvUse(int x) { + boolean cond = $inline$XLtz(x); + if (cond) { + $noinline$A(); + } else { + $noinline$B(); + } + } + + /// CHECK-START: void Main.$noinline$IfXNullAElseB(java.lang.Object) prepare_for_register_allocation (before) + /// CHECK: <<Cond:z\d+>> {{Equal|NotEqual}} emitted_at_use_site:false + /// CHECK-NEXT: If [<<Cond>>] + + /// CHECK-START: void Main.$noinline$IfXNullAElseB(java.lang.Object) prepare_for_register_allocation (after) + /// CHECK: <<Cond:z\d+>> {{Equal|NotEqual}} emitted_at_use_site:true + /// CHECK-NEXT: If [<<Cond>>] + + public static void $noinline$IfXNullAElseB(Object x) { + if (x == null) { + $noinline$A(); + } else { + $noinline$B(); + } + } + + /// CHECK-START: void Main.$noinline$IfXNullAElseB_Move(java.lang.Object) prepare_for_register_allocation (before) + /// CHECK: <<Cond:z\d+>> Equal emitted_at_use_site:false + /// CHECK-NEXT: InstanceFieldGet + // On X86, there can be also X86ComputeBaseMethodAddress here. + /// CHECK: If [<<Cond>>] + + /// CHECK-START: void Main.$noinline$IfXNullAElseB_Move(java.lang.Object) prepare_for_register_allocation (after) + /// CHECK: InstanceFieldGet + // On X86, there can be also X86ComputeBaseMethodAddress here. + /// CHECK: <<Cond:z\d+>> Equal emitted_at_use_site:true + /// CHECK-NEXT: If [<<Cond>>] + + public void $noinline$IfXNullAElseB_Move(Object x) { + boolean cond = $inline$XNull(x); + + int value = intField; + if (cond) { + cond = false; // Avoid environment use below. + $noinline$A(); + } else { + cond = false; // Avoid environment use below. + $noinline$B(); + } + $noinline$ignore(value); + } + + /// CHECK-START: void Main.$noinline$IfXNullAElseB_EnvUse(java.lang.Object) prepare_for_register_allocation (before) + /// CHECK: Equal emitted_at_use_site:false + + /// CHECK-START: void Main.$noinline$IfXNullAElseB_EnvUse(java.lang.Object) prepare_for_register_allocation (after) + /// CHECK: Equal emitted_at_use_site:false + + public static void $noinline$IfXNullAElseB_EnvUse(Object x) { + boolean cond = $inline$XNull(x); + if (cond) { + $noinline$A(); + } else { + $noinline$B(); + } + } + + /// CHECK-START: void Main.$noinline$IfXNullAElseB_RefNoEnvInBlock(java.lang.Object, boolean) prepare_for_register_allocation (before) + /// CHECK: <<Cond:z\d+>> {{Equal|NotEqual}} emitted_at_use_site:false + /// CHECK: If [<<Cond>>] + + /// CHECK-START: void Main.$noinline$IfXNullAElseB_RefNoEnvInBlock(java.lang.Object, boolean) prepare_for_register_allocation (after) + /// CHECK: <<Cond:z\d+>> {{Equal|NotEqual}} emitted_at_use_site:false + /// CHECK: If [<<Cond>>] + + public static void $noinline$IfXNullAElseB_RefNoEnvInBlock(Object x, boolean otherCond) { + boolean cond = $inline$XNull(x); + if (otherCond) { + if (cond) { + cond = false; // Avoid environment use below. + $noinline$A(); + } else { + cond = false; // Avoid environment use below. + $noinline$B(); + } + } else { + cond = false; // Avoid environment use below. + $noinline$C(); + } + } + + /// CHECK-START: void Main.$noinline$IfLt7_0AElseB_86LoadFromConstantTable(double, boolean) prepare_for_register_allocation (before) + /// CHECK: <<Cond:z\d+>> {{LessThan|GreaterThanOrEqual}} emitted_at_use_site:false + /// CHECK: If [<<Cond>>] + + /// CHECK-START: void Main.$noinline$IfLt7_0AElseB_86LoadFromConstantTable(double, boolean) prepare_for_register_allocation (after) + /// CHECK: <<Cond:z\d+>> {{LessThan|GreaterThanOrEqual}} emitted_at_use_site:true + /// CHECK-NEXT: If [<<Cond>>] + + /// CHECK-START-X86: void Main.$noinline$IfLt7_0AElseB_86LoadFromConstantTable(double, boolean) prepare_for_register_allocation (after) + /// CHECK: X86ComputeBaseMethodAddress + // Note: X86ComputeBaseMethodAddress is not moved before X86LoadFromConstantTable because + // it has additional uses in all the `$noinline$` invokes. + /// CHECK: X86LoadFromConstantTable + /// CHECK-NEXT: <<Cond:z\d+>> {{LessThan|GreaterThanOrEqual}} emitted_at_use_site:true + /// CHECK-NEXT: If [<<Cond>>] + + /// CHECK-START-X86: void Main.$noinline$IfLt7_0AElseB_86LoadFromConstantTable(double, boolean) prepare_for_register_allocation (after) + /// CHECK-DAG: <<MA:i\d+>> X86ComputeBaseMethodAddress + /// CHECK-DAG: InvokeStaticOrDirect [<<MA>>] + + public static void $noinline$IfLt7_0AElseB_86LoadFromConstantTable( + double x, boolean otherCond) { + boolean cond = $inline$XLt7_0(x); + if (otherCond) { + if (cond) { + cond = false; // Avoid environment use below. + $noinline$A(); + } else { + cond = false; // Avoid environment use below. + $noinline$B(); + } + } else { + cond = false; // Avoid environment use below. + $noinline$C(); + } + } +} |