diff options
38 files changed, 798 insertions, 285 deletions
diff --git a/.gitignore b/.gitignore index 3d1658d84a..c4cf98b37c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1 @@ -USE_LLVM_COMPILER -USE_PORTABLE_COMPILER -SMALL_ART -SEA_IR_ART JIT_ART diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index 6a83e72e8c..08b4ec2fbc 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -60,18 +60,6 @@ ART_JIT := true endif # -# Used to enable smart mode -# -ART_SMALL_MODE := false -ifneq ($(wildcard art/SMALL_ART),) -$(info Enabling ART_SMALL_MODE because of existence of art/SMALL_ART) -ART_SMALL_MODE := true -endif -ifeq ($(WITH_ART_SMALL_MODE), true) -ART_SMALL_MODE := true -endif - -# # Used to change the default GC. Valid values are CMS, SS, GSS. The default is CMS. # ART_DEFAULT_GC_TYPE ?= CMS @@ -219,10 +207,6 @@ else art_cflags += -DIMT_SIZE=64 endif -ifeq ($(ART_SMALL_MODE),true) - art_cflags += -DART_SMALL_MODE=1 -endif - ifeq ($(ART_USE_OPTIMIZING_COMPILER),true) art_cflags += -DART_USE_OPTIMIZING_COMPILER=1 endif diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index 9cf005bc48..1a9dbeae0f 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -161,7 +161,11 @@ void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(), mirror::Object::MonitorOffset().Int32Value() >> 2); MarkPossibleNullPointerException(opt_flags); - LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL); + // Zero out the read barrier bits. + OpRegRegImm(kOpAnd, rs_r3, rs_r1, LockWord::kReadBarrierStateMaskShiftedToggled); + LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r3, 0, NULL); + // r1 is zero except for the rb bits here. Copy the read barrier bits into r2. + OpRegRegReg(kOpOr, rs_r2, rs_r2, rs_r1); NewLIR4(kThumb2Strex, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(), mirror::Object::MonitorOffset().Int32Value() >> 2); LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL); @@ -189,7 +193,14 @@ void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(), mirror::Object::MonitorOffset().Int32Value() >> 2); MarkPossibleNullPointerException(opt_flags); - OpRegImm(kOpCmp, rs_r1, 0); + // Zero out the read barrier bits. + OpRegRegImm(kOpAnd, rs_r3, rs_r1, LockWord::kReadBarrierStateMaskShiftedToggled); + // r1 will be zero except for the rb bits if the following + // cmp-and-branch branches to eq where r2 will be used. Copy the + // read barrier bits into r2. + OpRegRegReg(kOpOr, rs_r2, rs_r2, rs_r1); + OpRegImm(kOpCmp, rs_r3, 0); + LIR* it = OpIT(kCondEq, ""); NewLIR4(kThumb2Strex/*eq*/, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(), mirror::Object::MonitorOffset().Int32Value() >> 2); @@ -228,14 +239,28 @@ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); } } - Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); + if (!kUseReadBarrier) { + Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); // Get lock + } else { + NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(), + mirror::Object::MonitorOffset().Int32Value() >> 2); + } MarkPossibleNullPointerException(opt_flags); - LoadConstantNoClobber(rs_r3, 0); - LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL); + // Zero out the read barrier bits. + OpRegRegImm(kOpAnd, rs_r3, rs_r1, LockWord::kReadBarrierStateMaskShiftedToggled); + // Zero out except the read barrier bits. + OpRegRegImm(kOpAnd, rs_r1, rs_r1, LockWord::kReadBarrierStateMaskShifted); + LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r3, rs_r2, NULL); GenMemBarrier(kAnyStore); - Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3); - LIR* unlock_success_branch = OpUnconditionalBranch(NULL); - + LIR* unlock_success_branch; + if (!kUseReadBarrier) { + Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); + unlock_success_branch = OpUnconditionalBranch(NULL); + } else { + NewLIR4(kThumb2Strex, rs_r2.GetReg(), rs_r1.GetReg(), rs_r0.GetReg(), + mirror::Object::MonitorOffset().Int32Value() >> 2); + unlock_success_branch = OpCmpImmBranch(kCondEq, rs_r2, 0, NULL); + } LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); slow_unlock_branch->target = slow_path_target; if (null_check_branch != nullptr) { @@ -253,25 +278,57 @@ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { } else { // Explicit null-check as slow-path is entered using an IT. GenNullCheck(rs_r0, opt_flags); - Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); // Get lock + if (!kUseReadBarrier) { + Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); // Get lock + } else { + // If we use read barriers, we need to use atomic instructions. + NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(), + mirror::Object::MonitorOffset().Int32Value() >> 2); + } MarkPossibleNullPointerException(opt_flags); Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); - LoadConstantNoClobber(rs_r3, 0); + // Zero out the read barrier bits. + OpRegRegImm(kOpAnd, rs_r3, rs_r1, LockWord::kReadBarrierStateMaskShiftedToggled); + // Zero out except the read barrier bits. + OpRegRegImm(kOpAnd, rs_r1, rs_r1, LockWord::kReadBarrierStateMaskShifted); // Is lock unheld on lock or held by us (==thread_id) on unlock? - OpRegReg(kOpCmp, rs_r1, rs_r2); - - LIR* it = OpIT(kCondEq, "EE"); - if (GenMemBarrier(kAnyStore)) { - UpdateIT(it, "TEE"); + OpRegReg(kOpCmp, rs_r3, rs_r2); + if (!kUseReadBarrier) { + LIR* it = OpIT(kCondEq, "EE"); + if (GenMemBarrier(kAnyStore)) { + UpdateIT(it, "TEE"); + } + Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); + // Go expensive route - UnlockObjectFromCode(obj); + LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), + rs_rARM_LR); + ClobberCallerSave(); + LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR); + OpEndIT(it); + MarkSafepointPC(call_inst); + } else { + // If we use read barriers, we need to use atomic instructions. + LIR* it = OpIT(kCondEq, ""); + if (GenMemBarrier(kAnyStore)) { + UpdateIT(it, "T"); + } + NewLIR4/*eq*/(kThumb2Strex, rs_r2.GetReg(), rs_r1.GetReg(), rs_r0.GetReg(), + mirror::Object::MonitorOffset().Int32Value() >> 2); + OpEndIT(it); + // Since we know r2 wasn't zero before the above it instruction, + // if r2 is zero here, we know r3 was equal to r2 and the strex + // suceeded (we're done). Otherwise (either r3 wasn't equal to r2 + // or the strex failed), call the entrypoint. + OpRegImm(kOpCmp, rs_r2, 0); + LIR* it2 = OpIT(kCondNe, "T"); + // Go expensive route - UnlockObjectFromCode(obj); + LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), + rs_rARM_LR); + ClobberCallerSave(); + LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR); + OpEndIT(it2); + MarkSafepointPC(call_inst); } - Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3); - // Go expensive route - UnlockObjectFromCode(obj); - LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), - rs_rARM_LR); - ClobberCallerSave(); - LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR); - OpEndIT(it); - MarkSafepointPC(call_inst); } } diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 24e8fdff80..15edcc5142 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -172,7 +172,12 @@ void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value()); NewLIR2(kA64Ldxr2rX, rw3, rx2); MarkPossibleNullPointerException(opt_flags); - LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_w3, 0, NULL); + // Zero out the read barrier bits. + OpRegRegImm(kOpAnd, rs_w2, rs_w3, LockWord::kReadBarrierStateMaskShiftedToggled); + LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_w2, 0, NULL); + // w3 is zero except for the rb bits here. Copy the read barrier bits into w1. + OpRegRegReg(kOpOr, rs_w1, rs_w1, rs_w3); + OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value()); NewLIR3(kA64Stxr3wrX, rw3, rw1, rx2); LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_w3, 0, NULL); @@ -217,13 +222,28 @@ void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { } } Load32Disp(rs_xSELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1); - Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2); + if (!kUseReadBarrier) { + Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2); + } else { + OpRegRegImm(kOpAdd, rs_x3, rs_x0, mirror::Object::MonitorOffset().Int32Value()); + NewLIR2(kA64Ldxr2rX, rw2, rx3); + } MarkPossibleNullPointerException(opt_flags); - LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w1, rs_w2, NULL); + // Zero out the read barrier bits. + OpRegRegImm(kOpAnd, rs_w3, rs_w2, LockWord::kReadBarrierStateMaskShiftedToggled); + // Zero out except the read barrier bits. + OpRegRegImm(kOpAnd, rs_w2, rs_w2, LockWord::kReadBarrierStateMaskShifted); + LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w3, rs_w1, NULL); GenMemBarrier(kAnyStore); - Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_wzr); - LIR* unlock_success_branch = OpUnconditionalBranch(NULL); - + LIR* unlock_success_branch; + if (!kUseReadBarrier) { + Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2); + unlock_success_branch = OpUnconditionalBranch(NULL); + } else { + OpRegRegImm(kOpAdd, rs_x3, rs_x0, mirror::Object::MonitorOffset().Int32Value()); + NewLIR3(kA64Stxr3wrX, rw1, rw2, rx3); + unlock_success_branch = OpCmpImmBranch(kCondEq, rs_w1, 0, NULL); + } LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); slow_unlock_branch->target = slow_path_target; if (null_check_branch != nullptr) { diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index 122ae4b575..0683d185e6 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -41,11 +41,7 @@ class CompilerOptions FINAL { }; // Guide heuristics to determine whether to compile method if profile data not available. -#if ART_SMALL_MODE - static const CompilerFilter kDefaultCompilerFilter = kInterpretOnly; -#else static const CompilerFilter kDefaultCompilerFilter = kSpeed; -#endif static const size_t kDefaultHugeMethodThreshold = 10000; static const size_t kDefaultLargeMethodThreshold = 600; static const size_t kDefaultSmallMethodThreshold = 60; diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index f5f9320532..b4732c87c8 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -909,7 +909,9 @@ void ImageWriter::CopyAndFixupObjects() { heap->VisitObjects(CopyAndFixupObjectsCallback, this); // Fix up the object previously had hash codes. for (const std::pair<mirror::Object*, uint32_t>& hash_pair : saved_hashes_) { - hash_pair.first->SetLockWord(LockWord::FromHashCode(hash_pair.second), false); + Object* obj = hash_pair.first; + DCHECK_EQ(obj->GetLockWord(false).ReadBarrierState(), 0U); + obj->SetLockWord(LockWord::FromHashCode(hash_pair.second, 0U), false); } saved_hashes_.clear(); } @@ -935,7 +937,7 @@ void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) { Object* copy = reinterpret_cast<Object*>(dst); // Write in a hash code of objects which have inflated monitors or a hash code in their monitor // word. - copy->SetLockWord(LockWord(), false); + copy->SetLockWord(LockWord::Default(), false); image_writer->FixupObject(obj, copy); } diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index deaeb8ef47..4ca364867d 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -261,8 +261,8 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { virtual ~ValueRange() {} - virtual const MonotonicValueRange* AsMonotonicValueRange() const { return nullptr; } - bool IsMonotonicValueRange() const { + virtual MonotonicValueRange* AsMonotonicValueRange() { return nullptr; } + bool IsMonotonicValueRange() { return AsMonotonicValueRange() != nullptr; } @@ -345,7 +345,11 @@ class MonotonicValueRange : public ValueRange { virtual ~MonotonicValueRange() {} - const MonotonicValueRange* AsMonotonicValueRange() const OVERRIDE { return this; } + int32_t GetIncrement() const { return increment_; } + + ValueBound GetBound() const { return bound_; } + + MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; } // If it's certain that this value range fits in other_range. bool FitsIn(ValueRange* other_range) const OVERRIDE { @@ -494,6 +498,73 @@ class BCEVisitor : public HGraphVisitor { } } + // Special case that we may simultaneously narrow two MonotonicValueRange's to + // regular value ranges. + void HandleIfBetweenTwoMonotonicValueRanges(HIf* instruction, + HInstruction* left, + HInstruction* right, + IfCondition cond, + MonotonicValueRange* left_range, + MonotonicValueRange* right_range) { + DCHECK(left->IsLoopHeaderPhi()); + DCHECK(right->IsLoopHeaderPhi()); + if (instruction->GetBlock() != left->GetBlock()) { + // Comparison needs to be in loop header to make sure it's done after each + // increment/decrement. + return; + } + + // Handle common cases which also don't have overflow/underflow concerns. + if (left_range->GetIncrement() == 1 && + left_range->GetBound().IsConstant() && + right_range->GetIncrement() == -1 && + right_range->GetBound().IsRelatedToArrayLength() && + right_range->GetBound().GetConstant() < 0) { + HBasicBlock* successor = nullptr; + int32_t left_compensation = 0; + int32_t right_compensation = 0; + if (cond == kCondLT) { + left_compensation = -1; + right_compensation = 1; + successor = instruction->IfTrueSuccessor(); + } else if (cond == kCondLE) { + successor = instruction->IfTrueSuccessor(); + } else if (cond == kCondGT) { + successor = instruction->IfFalseSuccessor(); + } else if (cond == kCondGE) { + left_compensation = -1; + right_compensation = 1; + successor = instruction->IfFalseSuccessor(); + } else { + // We don't handle '=='/'!=' test in case left and right can cross and + // miss each other. + return; + } + + if (successor != nullptr) { + bool overflow; + bool underflow; + ValueRange* new_left_range = new (GetGraph()->GetArena()) ValueRange( + GetGraph()->GetArena(), + left_range->GetBound(), + right_range->GetBound().Add(left_compensation, &overflow, &underflow)); + if (!overflow && !underflow) { + ApplyRangeFromComparison(left, instruction->GetBlock(), successor, + new_left_range); + } + + ValueRange* new_right_range = new (GetGraph()->GetArena()) ValueRange( + GetGraph()->GetArena(), + left_range->GetBound().Add(right_compensation, &overflow, &underflow), + right_range->GetBound()); + if (!overflow && !underflow) { + ApplyRangeFromComparison(right, instruction->GetBlock(), successor, + new_right_range); + } + } + } + } + // Handle "if (left cmp_cond right)". void HandleIf(HIf* instruction, HInstruction* left, HInstruction* right, IfCondition cond) { HBasicBlock* block = instruction->GetBlock(); @@ -515,10 +586,19 @@ class BCEVisitor : public HGraphVisitor { if (!found) { // No constant or array.length+c format bound found. // For i<j, we can still use j's upper bound as i's upper bound. Same for lower. - ValueRange* range = LookupValueRange(right, block); - if (range != nullptr) { - lower = range->GetLower(); - upper = range->GetUpper(); + ValueRange* right_range = LookupValueRange(right, block); + if (right_range != nullptr) { + if (right_range->IsMonotonicValueRange()) { + ValueRange* left_range = LookupValueRange(left, block); + if (left_range != nullptr && left_range->IsMonotonicValueRange()) { + HandleIfBetweenTwoMonotonicValueRanges(instruction, left, right, cond, + left_range->AsMonotonicValueRange(), + right_range->AsMonotonicValueRange()); + return; + } + } + lower = right_range->GetLower(); + upper = right_range->GetUpper(); } else { lower = ValueBound::Min(); upper = ValueBound::Max(); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index cda5c1a99c..07cc41a8d5 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -988,7 +988,7 @@ void InstructionCodeGeneratorARM::VisitCondition(HCondition* comp) { __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>())); } else { DCHECK(locations->InAt(1).IsConstant()); - int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + int32_t value = CodeGenerator::GetInt32ValueOf(locations->InAt(1).GetConstant()); ShifterOperand operand; if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, value, &operand)) { __ cmp(left, operand); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 116dd158d1..3c8f62c789 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -922,7 +922,7 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) { if (rhs.IsRegister()) { __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>()); } else if (rhs.IsConstant()) { - int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue(); + int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); if (constant == 0) { __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); } else { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index adc022a2ce..6365bca319 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -894,7 +894,7 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) { if (rhs.IsRegister()) { __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); } else if (rhs.IsConstant()) { - int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue(); + int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); if (constant == 0) { __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); } else { diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index 007324eb68..9447d3b816 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -118,8 +118,14 @@ static inline vixl::CPURegister InputCPURegisterAt(HInstruction* instr, int inde static inline int64_t Int64ConstantFrom(Location location) { HConstant* instr = location.GetConstant(); - return instr->IsIntConstant() ? instr->AsIntConstant()->GetValue() - : instr->AsLongConstant()->GetValue(); + if (instr->IsIntConstant()) { + return instr->AsIntConstant()->GetValue(); + } else if (instr->IsNullConstant()) { + return 0; + } else { + DCHECK(instr->IsLongConstant()); + return instr->AsLongConstant()->GetValue(); + } } static inline vixl::Operand OperandFrom(Location location, Primitive::Type type) { diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index cb448c883f..ea65dc0780 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -299,8 +299,17 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { // Save the next instruction in case `current` is removed from the graph. HInstruction* next = current->GetNext(); if (current->CanBeMoved()) { + if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) { + // For commutative ops, (x op y) will be treated the same as (y op x) + // after fixed ordering. + current->AsBinaryOperation()->OrderInputs(); + } HInstruction* existing = set->Lookup(current); if (existing != nullptr) { + // This replacement doesn't make more OrderInputs() necessary since + // current is either used by an instruction that it dominates, + // which hasn't been visited yet due to the order we visit instructions. + // Or current is used by a phi, and we don't do OrderInputs() on a phi anyway. current->ReplaceWith(existing); current->GetBlock()->RemoveInstruction(current); } else { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 7e075644ef..98076a05f2 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1500,7 +1500,39 @@ class HBinaryOperation : public HExpression<2> { HInstruction* GetRight() const { return InputAt(1); } Primitive::Type GetResultType() const { return GetType(); } - virtual bool IsCommutative() { return false; } + virtual bool IsCommutative() const { return false; } + + // Put constant on the right. + // Returns whether order is changed. + bool OrderInputsWithConstantOnTheRight() { + HInstruction* left = InputAt(0); + HInstruction* right = InputAt(1); + if (left->IsConstant() && !right->IsConstant()) { + ReplaceInput(right, 0); + ReplaceInput(left, 1); + return true; + } + return false; + } + + // Order inputs by instruction id, but favor constant on the right side. + // This helps GVN for commutative ops. + void OrderInputs() { + DCHECK(IsCommutative()); + HInstruction* left = InputAt(0); + HInstruction* right = InputAt(1); + if (left == right || (!left->IsConstant() && right->IsConstant())) { + return; + } + if (OrderInputsWithConstantOnTheRight()) { + return; + } + // Order according to instruction id. + if (left->GetId() > right->GetId()) { + ReplaceInput(right, 0); + ReplaceInput(left, 1); + } + } virtual bool CanBeMoved() const { return true; } virtual bool InstructionDataEquals(HInstruction* other) const { @@ -1529,8 +1561,6 @@ class HCondition : public HBinaryOperation { : HBinaryOperation(Primitive::kPrimBoolean, first, second), needs_materialization_(true) {} - virtual bool IsCommutative() { return true; } - bool NeedsMaterialization() const { return needs_materialization_; } void ClearNeedsMaterialization() { needs_materialization_ = false; } @@ -1556,6 +1586,8 @@ class HEqual : public HCondition { HEqual(HInstruction* first, HInstruction* second) : HCondition(first, second) {} + bool IsCommutative() const OVERRIDE { return true; } + virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x == y ? 1 : 0; } @@ -1578,6 +1610,8 @@ class HNotEqual : public HCondition { HNotEqual(HInstruction* first, HInstruction* second) : HCondition(first, second) {} + bool IsCommutative() const OVERRIDE { return true; } + virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x != y ? 1 : 0; } @@ -2136,7 +2170,7 @@ class HAdd : public HBinaryOperation { HAdd(Primitive::Type result_type, HInstruction* left, HInstruction* right) : HBinaryOperation(result_type, left, right) {} - virtual bool IsCommutative() { return true; } + virtual bool IsCommutative() const OVERRIDE { return true; } virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x + y; @@ -2174,7 +2208,7 @@ class HMul : public HBinaryOperation { HMul(Primitive::Type result_type, HInstruction* left, HInstruction* right) : HBinaryOperation(result_type, left, right) {} - virtual bool IsCommutative() { return true; } + virtual bool IsCommutative() const OVERRIDE { return true; } virtual int32_t Evaluate(int32_t x, int32_t y) const { return x * y; } virtual int64_t Evaluate(int64_t x, int64_t y) const { return x * y; } @@ -2323,7 +2357,7 @@ class HAnd : public HBinaryOperation { HAnd(Primitive::Type result_type, HInstruction* left, HInstruction* right) : HBinaryOperation(result_type, left, right) {} - bool IsCommutative() OVERRIDE { return true; } + bool IsCommutative() const OVERRIDE { return true; } int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x & y; } int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x & y; } @@ -2339,7 +2373,7 @@ class HOr : public HBinaryOperation { HOr(Primitive::Type result_type, HInstruction* left, HInstruction* right) : HBinaryOperation(result_type, left, right) {} - bool IsCommutative() OVERRIDE { return true; } + bool IsCommutative() const OVERRIDE { return true; } int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x | y; } int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x | y; } @@ -2355,7 +2389,7 @@ class HXor : public HBinaryOperation { HXor(Primitive::Type result_type, HInstruction* left, HInstruction* right) : HBinaryOperation(result_type, left, right) {} - bool IsCommutative() OVERRIDE { return true; } + bool IsCommutative() const OVERRIDE { return true; } int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x ^ y; } int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x ^ y; } diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 22665ea6ee..df7bb57f24 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -180,11 +180,7 @@ NO_RETURN static void Usage(const char* fmt, ...) { "|time):"); UsageError(" select compiler filter."); UsageError(" Example: --compiler-filter=everything"); -#if ART_SMALL_MODE - UsageError(" Default: interpret-only"); -#else UsageError(" Default: speed"); -#endif UsageError(""); UsageError(" --huge-method-max=<method-instruction-count>: the threshold size for a huge"); UsageError(" method for compiler filter tuning."); @@ -875,15 +871,8 @@ class Dex2Oat FINAL { // For Mips64, can only compile in interpreter mode. // TODO: fix compiler for Mips64. compiler_filter_string = "interpret-only"; - } else if (image_) { - compiler_filter_string = "speed"; } else { - // TODO: Migrate SMALL mode to command line option. - #if ART_SMALL_MODE - compiler_filter_string = "interpret-only"; - #else compiler_filter_string = "speed"; - #endif } } CHECK(compiler_filter_string != nullptr); diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 539b6078a5..9bd8ba7921 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -469,26 +469,33 @@ ENTRY art_quick_lock_object .Lretry_lock: ldr r2, [r9, #THREAD_ID_OFFSET] ldrex r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - cbnz r1, .Lnot_unlocked @ already thin locked - @ unlocked case - r2 holds thread id with count of 0 + mov r3, r1 + and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits + cbnz r3, .Lnot_unlocked @ already thin locked + @ unlocked case - r1: original lock word that's zero except for the read barrier bits. + orr r2, r1, r2 @ r2 holds thread id with count of 0 with preserved read barrier bits strex r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - cbnz r3, .Lstrex_fail @ store failed, retry + cbnz r3, .Llock_strex_fail @ store failed, retry dmb ish @ full (LoadLoad|LoadStore) memory barrier bx lr -.Lstrex_fail: - b .Lretry_lock @ unlikely forward branch, need to reload and recheck r1/r2 -.Lnot_unlocked: - lsr r3, r1, 30 +.Lnot_unlocked: @ r1: original lock word, r2: thread_id with count of 0 and zero read barrier bits + lsr r3, r1, LOCK_WORD_STATE_SHIFT cbnz r3, .Lslow_lock @ if either of the top two bits are set, go slow path eor r2, r1, r2 @ lock_word.ThreadId() ^ self->ThreadId() uxth r2, r2 @ zero top 16 bits cbnz r2, .Lslow_lock @ lock word and self thread id's match -> recursive lock @ else contention, go to slow path - add r2, r1, #65536 @ increment count in lock word placing in r2 for storing - lsr r1, r2, 30 @ if either of the top two bits are set, we overflowed. - cbnz r1, .Lslow_lock @ if we overflow the count go slow path - str r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ no need for strex as we hold the lock + mov r3, r1 @ copy the lock word to check count overflow. + and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits. + add r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ increment count in lock word placing in r2 to check overflow + lsr r3, r2, LOCK_WORD_READ_BARRIER_STATE_SHIFT @ if either of the upper two bits (28-29) are set, we overflowed. + cbnz r3, .Lslow_lock @ if we overflow the count go slow path + add r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ increment count for real + strex r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits + cbnz r3, .Llock_strex_fail @ strex failed, retry bx lr +.Llock_strex_fail: + b .Lretry_lock @ retry .Lslow_lock: SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2 @ save callee saves in case we block mov r1, r9 @ pass Thread::Current @@ -505,23 +512,46 @@ END art_quick_lock_object .extern artUnlockObjectFromCode ENTRY art_quick_unlock_object cbz r0, .Lslow_unlock +.Lretry_unlock: +#ifndef USE_READ_BARRIER ldr r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - lsr r2, r1, 30 +#else + ldrex r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ Need to use atomic instructions for read barrier +#endif + lsr r2, r1, #LOCK_WORD_STATE_SHIFT cbnz r2, .Lslow_unlock @ if either of the top two bits are set, go slow path ldr r2, [r9, #THREAD_ID_OFFSET] - eor r3, r1, r2 @ lock_word.ThreadId() ^ self->ThreadId() + mov r3, r1 @ copy lock word to check thread id equality + and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits + eor r3, r3, r2 @ lock_word.ThreadId() ^ self->ThreadId() uxth r3, r3 @ zero top 16 bits cbnz r3, .Lslow_unlock @ do lock word and self thread id's match? - cmp r1, #65536 + mov r3, r1 @ copy lock word to detect transition to unlocked + and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits + cmp r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE bpl .Lrecursive_thin_unlock - @ transition to unlocked, r3 holds 0 + @ transition to unlocked + mov r3, r1 + and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK @ r3: zero except for the preserved read barrier bits dmb ish @ full (LoadStore|StoreStore) memory barrier +#ifndef USE_READ_BARRIER str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] +#else + strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits + cbnz r2, .Lunlock_strex_fail @ store failed, retry +#endif bx lr -.Lrecursive_thin_unlock: - sub r1, r1, #65536 +.Lrecursive_thin_unlock: @ r1: original lock word + sub r1, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ decrement count +#ifndef USE_READ_BARRIER str r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] +#else + strex r2, r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits + cbnz r2, .Lunlock_strex_fail @ store failed, retry +#endif bx lr +.Lunlock_strex_fail: + b .Lretry_unlock @ retry .Lslow_unlock: @ save callee saves in case exception allocation triggers GC SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2 diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index ec25a3391e..ff57603fe4 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1000,25 +1000,33 @@ ENTRY art_quick_lock_object .Lretry_lock: ldr w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop? ldxr w1, [x4] - cbnz w1, .Lnot_unlocked // already thin locked + mov x3, x1 + and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits + cbnz w3, .Lnot_unlocked // already thin locked + // unlocked case - x1: original lock word that's zero except for the read barrier bits. + orr x2, x1, x2 // x2 holds thread id with count of 0 with preserved read barrier bits stxr w3, w2, [x4] - cbnz w3, .Lstrex_fail // store failed, retry + cbnz w3, .Llock_stxr_fail // store failed, retry dmb ishld // full (LoadLoad|LoadStore) memory barrier ret -.Lstrex_fail: - b .Lretry_lock // unlikely forward branch, need to reload and recheck r1/r2 -.Lnot_unlocked: - lsr w3, w1, 30 +.Lnot_unlocked: // x1: original lock word + lsr w3, w1, LOCK_WORD_STATE_SHIFT cbnz w3, .Lslow_lock // if either of the top two bits are set, go slow path eor w2, w1, w2 // lock_word.ThreadId() ^ self->ThreadId() uxth w2, w2 // zero top 16 bits cbnz w2, .Lslow_lock // lock word and self thread id's match -> recursive lock // else contention, go to slow path - add w2, w1, #65536 // increment count in lock word placing in w2 for storing - lsr w1, w2, 30 // if either of the top two bits are set, we overflowed. - cbnz w1, .Lslow_lock // if we overflow the count go slow path - str w2, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] // no need for stxr as we hold the lock + mov x3, x1 // copy the lock word to check count overflow. + and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits. + add w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE // increment count in lock word placing in w2 to check overflow + lsr w3, w2, LOCK_WORD_READ_BARRIER_STATE_SHIFT // if either of the upper two bits (28-29) are set, we overflowed. + cbnz w3, .Lslow_lock // if we overflow the count go slow path + add w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE // increment count for real + stxr w3, w2, [x4] + cbnz w3, .Llock_stxr_fail // store failed, retry ret +.Llock_stxr_fail: + b .Lretry_lock // retry .Lslow_lock: SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case we block mov x1, xSELF // pass Thread::Current @@ -1036,23 +1044,47 @@ END art_quick_lock_object .extern artUnlockObjectFromCode ENTRY art_quick_unlock_object cbz x0, .Lslow_unlock - ldr w1, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - lsr w2, w1, 30 + add x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET // exclusive load/store has no immediate anymore +.Lretry_unlock: +#ifndef USE_READ_BARRIER + ldr w1, [x4] +#else + ldxr w1, [x4] // Need to use atomic instructions for read barrier +#endif + lsr w2, w1, LOCK_WORD_STATE_SHIFT cbnz w2, .Lslow_unlock // if either of the top two bits are set, go slow path ldr w2, [xSELF, #THREAD_ID_OFFSET] - eor w3, w1, w2 // lock_word.ThreadId() ^ self->ThreadId() + mov x3, x1 // copy lock word to check thread id equality + and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits + eor w3, w3, w2 // lock_word.ThreadId() ^ self->ThreadId() uxth w3, w3 // zero top 16 bits cbnz w3, .Lslow_unlock // do lock word and self thread id's match? - cmp w1, #65536 + mov x3, x1 // copy lock word to detect transition to unlocked + and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits + cmp w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE bpl .Lrecursive_thin_unlock - // transition to unlocked, w3 holds 0 + // transition to unlocked + mov x3, x1 + and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK // w3: zero except for the preserved read barrier bits dmb ish // full (LoadStore|StoreStore) memory barrier - str w3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] +#ifndef USE_READ_BARRIER + str w3, [x4] +#else + stxr w2, w3, [x4] // Need to use atomic instructions for read barrier + cbnz w2, .Lunlock_stxr_fail // store failed, retry +#endif ret -.Lrecursive_thin_unlock: - sub w1, w1, #65536 - str w1, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] +.Lrecursive_thin_unlock: // w1: original lock word + sub w1, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE // decrement count +#ifndef USE_READ_BARRIER + str w1, [x4] +#else + stxr w2, w1, [x4] // Need to use atomic instructions for read barrier + cbnz w2, .Lunlock_stxr_fail // store failed, retry +#endif ret +.Lunlock_stxr_fail: + b .Lretry_unlock // retry .Lslow_unlock: SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case exception allocation triggers GC mov x1, xSELF // pass Thread::Current diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 47bc5eaa7b..e59c881c73 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -922,29 +922,39 @@ DEFINE_FUNCTION art_quick_lock_object jz .Lslow_lock .Lretry_lock: movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx // ecx := lock word - test LITERAL(0xC0000000), %ecx // test the 2 high bits. + test LITERAL(LOCK_WORD_STATE_MASK), %ecx // test the 2 high bits. jne .Lslow_lock // slow path if either of the two high bits are set. - movl %fs:THREAD_ID_OFFSET, %edx // edx := thread id + movl %ecx, %edx // save lock word (edx) to keep read barrier bits. + andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits. test %ecx, %ecx jnz .Lalready_thin // lock word contains a thin lock - // unlocked case - %edx holds thread id with count of 0 + // unlocked case - edx: original lock word, eax: obj. movl %eax, %ecx // remember object in case of retry - xor %eax, %eax // eax == 0 for comparison with lock word in cmpxchg - lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) - jnz .Lcmpxchg_fail // cmpxchg failed retry + movl %edx, %eax // eax: lock word zero except for read barrier bits. + movl %fs:THREAD_ID_OFFSET, %edx // load thread id. + or %eax, %edx // edx: thread id with count of 0 + read barrier bits. + lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) // eax: old val, edx: new val. + jnz .Llock_cmpxchg_fail // cmpxchg failed retry ret -.Lcmpxchg_fail: - movl %ecx, %eax // restore eax - jmp .Lretry_lock -.Lalready_thin: +.Lalready_thin: // edx: lock word (with high 2 bits zero and original rb bits), eax: obj. + movl %fs:THREAD_ID_OFFSET, %ecx // ecx := thread id cmpw %cx, %dx // do we hold the lock already? jne .Lslow_lock - addl LITERAL(65536), %ecx // increment recursion count - test LITERAL(0xC0000000), %ecx // overflowed if either of top two bits are set + movl %edx, %ecx // copy the lock word to check count overflow. + andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits. + addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count for overflow check. + test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if either of the upper two bits (28-29) are set. jne .Lslow_lock // count overflowed so go slow - // update lockword, cmpxchg not necessary as we hold lock - movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax) + movl %eax, %ecx // save obj to use eax for cmpxchg. + movl %edx, %eax // copy the lock word as the old val for cmpxchg. + addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real. + // update lockword, cmpxchg necessary for read barrier bits. + lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) // eax: old val, edx: new val. + jnz .Llock_cmpxchg_fail // cmpxchg failed retry ret +.Llock_cmpxchg_fail: + movl %ecx, %eax // restore eax + jmp .Lretry_lock .Lslow_lock: SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC // Outgoing argument set up @@ -963,20 +973,43 @@ END_FUNCTION art_quick_lock_object DEFINE_FUNCTION art_quick_unlock_object testl %eax, %eax // null check object/eax jz .Lslow_unlock +.Lretry_unlock: movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx // ecx := lock word movl %fs:THREAD_ID_OFFSET, %edx // edx := thread id - test LITERAL(0xC0000000), %ecx + test LITERAL(LOCK_WORD_STATE_MASK), %ecx jnz .Lslow_unlock // lock word contains a monitor cmpw %cx, %dx // does the thread id match? jne .Lslow_unlock - cmpl LITERAL(65536), %ecx + movl %ecx, %edx // copy the lock word to detect new count of 0. + andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx // zero the read barrier bits. + cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx jae .Lrecursive_thin_unlock - movl LITERAL(0), MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax) + // update lockword, cmpxchg necessary for read barrier bits. + movl %eax, %edx // edx: obj + movl %ecx, %eax // eax: old lock word. + andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // ecx: new lock word zero except original rb bits. +#ifndef USE_READ_BARRIER + movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) +#else + lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) // eax: old val, ecx: new val. + jnz .Lunlock_cmpxchg_fail // cmpxchg failed retry +#endif ret -.Lrecursive_thin_unlock: - subl LITERAL(65536), %ecx - mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax) +.Lrecursive_thin_unlock: // ecx: original lock word, eax: obj + // update lockword, cmpxchg necessary for read barrier bits. + movl %eax, %edx // edx: obj + movl %ecx, %eax // eax: old lock word. + subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // ecx: new lock word with decremented count. +#ifndef USE_READ_BARRIER + mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) +#else + lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) // eax: old val, ecx: new val. + jnz .Lunlock_cmpxchg_fail // cmpxchg failed retry +#endif ret +.Lunlock_cmpxchg_fail: // edx: obj + movl %edx, %eax // restore eax + jmp .Lretry_unlock .Lslow_unlock: SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC // Outgoing argument set up diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 406126bc96..65c65e2b72 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -955,24 +955,33 @@ DEFINE_FUNCTION art_quick_lock_object jz .Lslow_lock .Lretry_lock: movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word. - test LITERAL(0xC0000000), %ecx // Test the 2 high bits. + test LITERAL(LOCK_WORD_STATE_MASK), %ecx // Test the 2 high bits. jne .Lslow_lock // Slow path if either of the two high bits are set. - movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id + movl %ecx, %edx // save lock word (edx) to keep read barrier bits. + andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits. test %ecx, %ecx jnz .Lalready_thin // Lock word contains a thin lock. - // unlocked case - %edx holds thread id with count of 0 - xor %eax, %eax // eax == 0 for comparison with lock word in cmpxchg + // unlocked case - edx: original lock word, edi: obj. + movl %edx, %eax // eax: lock word zero except for read barrier bits. + movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id + or %eax, %edx // edx: thread id with count of 0 + read barrier bits. lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) jnz .Lretry_lock // cmpxchg failed retry ret -.Lalready_thin: +.Lalready_thin: // edx: lock word (with high 2 bits zero and original rb bits), edi: obj. + movl %gs:THREAD_ID_OFFSET, %ecx // ecx := thread id cmpw %cx, %dx // do we hold the lock already? jne .Lslow_lock - addl LITERAL(65536), %ecx // increment recursion count - test LITERAL(0xC0000000), %ecx // overflowed if either of top two bits are set + movl %edx, %ecx // copy the lock word to check count overflow. + andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits. + addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count + test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if either of the upper two bits (28-29) are set jne .Lslow_lock // count overflowed so go slow - // update lockword, cmpxchg not necessary as we hold lock - movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) + movl %edx, %eax // copy the lock word as the old val for cmpxchg. + addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real. + // update lockword, cmpxchg necessary for read barrier bits. + lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, edx: new val. + jnz .Lretry_lock // cmpxchg failed retry ret .Lslow_lock: SETUP_REFS_ONLY_CALLEE_SAVE_FRAME @@ -985,19 +994,37 @@ END_FUNCTION art_quick_lock_object DEFINE_FUNCTION art_quick_unlock_object testl %edi, %edi // null check object/edi jz .Lslow_unlock +.Lretry_unlock: movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id - test LITERAL(0xC0000000), %ecx + test LITERAL(LOCK_WORD_STATE_MASK), %ecx jnz .Lslow_unlock // lock word contains a monitor cmpw %cx, %dx // does the thread id match? jne .Lslow_unlock - cmpl LITERAL(65536), %ecx + movl %ecx, %edx // copy the lock word to detect new count of 0. + andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx // zero the read barrier bits. + cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx jae .Lrecursive_thin_unlock - movl LITERAL(0), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) + // update lockword, cmpxchg necessary for read barrier bits. + movl %ecx, %eax // eax: old lock word. + andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // ecx: new lock word zero except original rb bits. +#ifndef USE_READ_BARRIER + movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) +#else + lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val. + jnz .Lretry_unlock // cmpxchg failed retry +#endif ret -.Lrecursive_thin_unlock: - subl LITERAL(65536), %ecx +.Lrecursive_thin_unlock: // ecx: original lock word, edi: obj + // update lockword, cmpxchg necessary for read barrier bits. + movl %ecx, %eax // eax: old lock word. + subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx +#ifndef USE_READ_BARRIER mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) +#else + lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val. + jnz .Lretry_unlock // cmpxchg failed retry +#endif ret .Lslow_unlock: SETUP_REFS_ONLY_CALLEE_SAVE_FRAME diff --git a/runtime/asm_support.h b/runtime/asm_support.h index a35e05b87b..ee70fe7c81 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -18,6 +18,7 @@ #define ART_RUNTIME_ASM_SUPPORT_H_ #if defined(__cplusplus) +#include "lock_word.h" #include "mirror/art_method.h" #include "mirror/class.h" #include "mirror/string.h" @@ -156,6 +157,27 @@ ADD_TEST_EQ(MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32, ADD_TEST_EQ(MIRROR_ART_METHOD_QUICK_CODE_OFFSET_64, art::mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(8).Int32Value()) +#define LOCK_WORD_STATE_SHIFT 30 +ADD_TEST_EQ(LOCK_WORD_STATE_SHIFT, static_cast<int32_t>(art::LockWord::kStateShift)) + +#define LOCK_WORD_STATE_MASK 0xC0000000 +ADD_TEST_EQ(LOCK_WORD_STATE_MASK, static_cast<uint32_t>(art::LockWord::kStateMaskShifted)) + +#define LOCK_WORD_READ_BARRIER_STATE_SHIFT 28 +ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_SHIFT, + static_cast<int32_t>(art::LockWord::kReadBarrierStateShift)) + +#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x30000000 +ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_MASK, + static_cast<int32_t>(art::LockWord::kReadBarrierStateMaskShifted)) + +#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xCFFFFFFF +ADD_TEST_EQ(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED, + static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled)) + +#define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536 +ADD_TEST_EQ(LOCK_WORD_THIN_LOCK_COUNT_ONE, static_cast<int32_t>(art::LockWord::kThinLockCountOne)) + #if defined(__cplusplus) } // End of CheckAsmSupportOffsets. #endif diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc index 6e00cc79b5..13dcb8c634 100644 --- a/runtime/base/mutex.cc +++ b/runtime/base/mutex.cc @@ -57,6 +57,7 @@ Mutex* Locks::reference_queue_soft_references_lock_ = nullptr; Mutex* Locks::reference_queue_weak_references_lock_ = nullptr; Mutex* Locks::runtime_shutdown_lock_ = nullptr; Mutex* Locks::thread_list_lock_ = nullptr; +ConditionVariable* Locks::thread_exit_cond_ = nullptr; Mutex* Locks::thread_suspend_count_lock_ = nullptr; Mutex* Locks::trace_lock_ = nullptr; Mutex* Locks::unexpected_signal_lock_ = nullptr; @@ -1063,8 +1064,13 @@ void Locks::Init() { logging_lock_ = new Mutex("logging lock", current_lock_level, true); #undef UPDATE_CURRENT_LOCK_LEVEL + + InitConditions(); } } +void Locks::InitConditions() { + thread_exit_cond_ = new ConditionVariable("thread exit condition variable", *thread_list_lock_); +} } // namespace art diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index 45d2347ee2..3b052c0615 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -487,7 +487,7 @@ class SCOPED_LOCKABLE WriterMutexLock { class Locks { public: static void Init(); - + static void InitConditions() NO_THREAD_SAFETY_ANALYSIS; // Condition variables. // Guards allocation entrypoint instrumenting. static Mutex* instrument_entrypoints_lock_; @@ -575,6 +575,9 @@ class Locks { // attaching and detaching. static Mutex* thread_list_lock_ ACQUIRED_AFTER(deoptimization_lock_); + // Signaled when threads terminate. Used to determine when all non-daemons have terminated. + static ConditionVariable* thread_exit_cond_ GUARDED_BY(Locks::thread_list_lock_); + // Guards maintaining loading library data structures. static Mutex* jni_libraries_lock_ ACQUIRED_AFTER(thread_list_lock_); diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 734c9351d5..057eed187f 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -484,14 +484,6 @@ inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) { } } -inline void ConcurrentCopying::SetFwdPtr(mirror::Object* from_ref, mirror::Object* to_ref) { - DCHECK(region_space_->IsInFromSpace(from_ref)); - DCHECK(region_space_->IsInToSpace(to_ref) || heap_->GetNonMovingSpace()->HasAddress(to_ref)); - LockWord lw = from_ref->GetLockWord(false); - DCHECK_NE(lw.GetState(), LockWord::kForwardingAddress); - from_ref->SetLockWord(LockWord::FromForwardingAddress(reinterpret_cast<size_t>(to_ref)), false); -} - // The following visitors are that used to verify that there's no // references to the from-space left after marking. class ConcurrentCopyingVerifyNoFromSpaceRefsVisitor { diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index d0e044639f..bbb551af23 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -230,8 +230,6 @@ class ConcurrentCopying : public GarbageCollector { bool IsOnAllocStack(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); mirror::Object* GetFwdPtr(mirror::Object* from_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void SetFwdPtr(mirror::Object* from_ref, mirror::Object* to_ref) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void FlipThreadRoots() LOCKS_EXCLUDED(Locks::mutator_lock_);; void SwapStacks(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void RecordLiveStackFreezeSize(Thread* self); diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index 234bce55b6..d1ce0bc68a 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -91,7 +91,7 @@ void MarkCompact::ForwardObject(mirror::Object* obj) { const size_t alloc_size = RoundUp(obj->SizeOf(), space::BumpPointerSpace::kAlignment); LockWord lock_word = obj->GetLockWord(false); // If we have a non empty lock word, store it and restore it later. - if (lock_word.GetValue() != LockWord().GetValue()) { + if (!LockWord::IsDefault(lock_word)) { // Set the bit in the bitmap so that we know to restore it later. objects_with_lockword_->Set(obj); lock_words_to_restore_.push_back(lock_word); @@ -509,7 +509,7 @@ void MarkCompact::MoveObject(mirror::Object* obj, size_t len) { // Use memmove since there may be overlap. memmove(reinterpret_cast<void*>(dest_addr), reinterpret_cast<const void*>(obj), len); // Restore the saved lock word if needed. - LockWord lock_word; + LockWord lock_word = LockWord::Default(); if (UNLIKELY(objects_with_lockword_->Test(obj))) { lock_word = lock_words_to_restore_.front(); lock_words_to_restore_.pop_front(); diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h index c52578f8a2..d831bfbee2 100644 --- a/runtime/lock_word-inl.h +++ b/runtime/lock_word-inl.h @@ -24,17 +24,20 @@ namespace art { inline uint32_t LockWord::ThinLockOwner() const { DCHECK_EQ(GetState(), kThinLocked); + CheckReadBarrierState(); return (value_ >> kThinLockOwnerShift) & kThinLockOwnerMask; } inline uint32_t LockWord::ThinLockCount() const { DCHECK_EQ(GetState(), kThinLocked); + CheckReadBarrierState(); return (value_ >> kThinLockCountShift) & kThinLockCountMask; } inline Monitor* LockWord::FatLockMonitor() const { DCHECK_EQ(GetState(), kFatLocked); - MonitorId mon_id = value_ & ~(kStateMask << kStateShift); + CheckReadBarrierState(); + MonitorId mon_id = (value_ >> kMonitorIdShift) & kMonitorIdMask; return MonitorPool::MonitorFromMonitorId(mon_id); } @@ -47,14 +50,20 @@ inline LockWord::LockWord() : value_(0) { DCHECK_EQ(GetState(), kUnlocked); } -inline LockWord::LockWord(Monitor* mon) - : value_(mon->GetMonitorId() | (kStateFat << kStateShift)) { +inline LockWord::LockWord(Monitor* mon, uint32_t rb_state) + : value_(mon->GetMonitorId() | (rb_state << kReadBarrierStateShift) | + (kStateFat << kStateShift)) { +#ifndef __LP64__ + DCHECK_ALIGNED(mon, kMonitorIdAlignment); +#endif DCHECK_EQ(FatLockMonitor(), mon); DCHECK_LE(mon->GetMonitorId(), static_cast<uint32_t>(kMaxMonitorId)); + CheckReadBarrierState(); } inline int32_t LockWord::GetHashCode() const { DCHECK_EQ(GetState(), kHashCode); + CheckReadBarrierState(); return (value_ >> kHashShift) & kHashMask; } diff --git a/runtime/lock_word.h b/runtime/lock_word.h index 2d5c71bb93..46c3bd4a99 100644 --- a/runtime/lock_word.h +++ b/runtime/lock_word.h @@ -21,6 +21,7 @@ #include <stdint.h> #include "base/logging.h" +#include "read_barrier.h" #include "utils.h" namespace art { @@ -31,34 +32,43 @@ namespace mirror { class Monitor; /* The lock value itself as stored in mirror::Object::monitor_. The two most significant bits of - * the state. The three possible states are fat locked, thin/unlocked, and hash code. - * When the lock word is in the "thin" state and its bits are formatted as follows: + * the state. The four possible states are fat locked, thin/unlocked, hash code, and forwarding + * address. When the lock word is in the "thin" state and its bits are formatted as follows: * - * |33|22222222221111|1111110000000000| - * |10|98765432109876|5432109876543210| - * |00| lock count |thread id owner | + * |33|22|222222221111|1111110000000000| + * |10|98|765432109876|5432109876543210| + * |00|rb| lock count |thread id owner | * * When the lock word is in the "fat" state and its bits are formatted as follows: * - * |33|222222222211111111110000000000| - * |10|987654321098765432109876543210| - * |01| MonitorId | + * |33|22|2222222211111111110000000000| + * |10|98|7654321098765432109876543210| + * |01|rb| MonitorId | * * When the lock word is in hash state and its bits are formatted as follows: * - * |33|222222222211111111110000000000| - * |10|987654321098765432109876543210| - * |10| HashCode | + * |33|22|2222222211111111110000000000| + * |10|98|7654321098765432109876543210| + * |10|rb| HashCode | + * + * When the lock word is in fowarding address state and its bits are formatted as follows: + * + * |33|22|2222222211111111110000000000| + * |10|98|7654321098765432109876543210| + * |11| ForwardingAddress | + * + * The rb bits store the read barrier state. */ class LockWord { public: enum SizeShiftsAndMasks { // private marker to avoid generate-operator-out.py from processing. // Number of bits to encode the state, currently just fat or thin/unlocked or hash code. kStateSize = 2, + kReadBarrierStateSize = 2, // Number of bits to encode the thin lock owner. kThinLockOwnerSize = 16, // Remaining bits are the recursive lock count. - kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize, + kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize, // Thin lock bits. Owner in lowest bits. kThinLockOwnerShift = 0, @@ -68,28 +78,41 @@ class LockWord { kThinLockCountShift = kThinLockOwnerSize + kThinLockOwnerShift, kThinLockCountMask = (1 << kThinLockCountSize) - 1, kThinLockMaxCount = kThinLockCountMask, + kThinLockCountOne = 1 << kThinLockCountShift, // == 65536 (0x10000) // State in the highest bits. - kStateShift = kThinLockCountSize + kThinLockCountShift, + kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift, kStateMask = (1 << kStateSize) - 1, + kStateMaskShifted = kStateMask << kStateShift, kStateThinOrUnlocked = 0, kStateFat = 1, kStateHash = 2, kStateForwardingAddress = 3, + kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift, + kReadBarrierStateMask = (1 << kReadBarrierStateSize) - 1, + kReadBarrierStateMaskShifted = kReadBarrierStateMask << kReadBarrierStateShift, + kReadBarrierStateMaskShiftedToggled = ~kReadBarrierStateMaskShifted, // When the state is kHashCode, the non-state bits hold the hashcode. kHashShift = 0, - kHashSize = 32 - kStateSize, + kHashSize = 32 - kStateSize - kReadBarrierStateSize, kHashMask = (1 << kHashSize) - 1, kMaxHash = kHashMask, + + kMonitorIdShift = kHashShift, + kMonitorIdSize = kHashSize, + kMonitorIdMask = kHashMask, + kMonitorIdAlignmentShift = 32 - kMonitorIdSize, + kMonitorIdAlignment = 1 << kMonitorIdAlignmentShift, kMaxMonitorId = kMaxHash }; - static LockWord FromThinLockId(uint32_t thread_id, uint32_t count) { + static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t rb_state) { CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockMaxOwner)); CHECK_LE(count, static_cast<uint32_t>(kThinLockMaxCount)); return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift) | - (kStateThinOrUnlocked << kStateShift)); + (rb_state << kReadBarrierStateShift) | + (kStateThinOrUnlocked << kStateShift)); } static LockWord FromForwardingAddress(size_t target) { @@ -97,9 +120,23 @@ class LockWord { return LockWord((target >> kStateSize) | (kStateForwardingAddress << kStateShift)); } - static LockWord FromHashCode(uint32_t hash_code) { + static LockWord FromHashCode(uint32_t hash_code, uint32_t rb_state) { CHECK_LE(hash_code, static_cast<uint32_t>(kMaxHash)); - return LockWord((hash_code << kHashShift) | (kStateHash << kStateShift)); + return LockWord((hash_code << kHashShift) | + (rb_state << kReadBarrierStateShift) | + (kStateHash << kStateShift)); + } + + static LockWord FromDefault(uint32_t rb_state) { + return LockWord(rb_state << kReadBarrierStateShift); + } + + static bool IsDefault(LockWord lw) { + return LockWord().GetValue() == lw.GetValue(); + } + + static LockWord Default() { + return LockWord(); } enum LockState { @@ -111,6 +148,7 @@ class LockWord { }; LockState GetState() const { + CheckReadBarrierState(); if (UNLIKELY(value_ == 0)) { return kUnlocked; } else { @@ -129,6 +167,10 @@ class LockWord { } } + uint32_t ReadBarrierState() const { + return (value_ >> kReadBarrierStateShift) & kReadBarrierStateMask; + } + // Return the owner thin lock thread id. uint32_t ThinLockOwner() const; @@ -141,25 +183,58 @@ class LockWord { // Return the forwarding address stored in the monitor. size_t ForwardingAddress() const; + // Constructor a lock word for inflation to use a Monitor. + explicit LockWord(Monitor* mon, uint32_t rb_state); + + // Return the hash code stored in the lock word, must be kHashCode state. + int32_t GetHashCode() const; + + template <bool kIncludeReadBarrierState> + static bool Equal(LockWord lw1, LockWord lw2) { + if (kIncludeReadBarrierState) { + return lw1.GetValue() == lw2.GetValue(); + } + return lw1.GetValueWithoutReadBarrierState() == lw2.GetValueWithoutReadBarrierState(); + } + + private: // Default constructor with no lock ownership. LockWord(); - // Constructor a lock word for inflation to use a Monitor. - explicit LockWord(Monitor* mon); - - bool operator==(const LockWord& rhs) const { - return GetValue() == rhs.GetValue(); + explicit LockWord(uint32_t val) : value_(val) { + CheckReadBarrierState(); } - // Return the hash code stored in the lock word, must be kHashCode state. - int32_t GetHashCode() const; + // Disallow this in favor of explicit Equal() with the + // kIncludeReadBarrierState param to make clients be aware of the + // read barrier state. + bool operator==(const LockWord& rhs) = delete; + + void CheckReadBarrierState() const { + if (kIsDebugBuild && ((value_ >> kStateShift) & kStateMask) != kStateForwardingAddress) { + uint32_t rb_state = ReadBarrierState(); + if (!kUseReadBarrier) { + DCHECK_EQ(rb_state, 0U); + } else { + DCHECK(rb_state == ReadBarrier::white_ptr_ || + rb_state == ReadBarrier::gray_ptr_ || + rb_state == ReadBarrier::black_ptr_) << rb_state; + } + } + } + // Note GetValue() includes the read barrier bits and comparing (==) + // GetValue() between two lock words to compare the lock states may + // not work. Prefer Equal() or GetValueWithoutReadBarrierState(). uint32_t GetValue() const { + CheckReadBarrierState(); return value_; } - private: - explicit LockWord(uint32_t val) : value_(val) {} + uint32_t GetValueWithoutReadBarrierState() const { + CheckReadBarrierState(); + return value_ & ~(kReadBarrierStateMask << kReadBarrierStateShift); + } // Only Object should be converting LockWords to/from uints. friend class mirror::Object; diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc index 9262a3e0b6..bbbdf98be8 100644 --- a/runtime/mirror/object.cc +++ b/runtime/mirror/object.cc @@ -159,7 +159,8 @@ int32_t Object::IdentityHashCode() const { case LockWord::kUnlocked: { // Try to compare and swap in a new hash, if we succeed we will return the hash on the next // loop iteration. - LockWord hash_word(LockWord::FromHashCode(GenerateIdentityHashCode())); + LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(), + lw.ReadBarrierState()); DCHECK_EQ(hash_word.GetState(), LockWord::kHashCode); if (const_cast<Object*>(this)->CasLockWordWeakRelaxed(lw, hash_word)) { return hash_word.GetHashCode(); diff --git a/runtime/monitor.cc b/runtime/monitor.cc index 5ed8c7ded9..45a971d22d 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -165,7 +165,7 @@ bool Monitor::Install(Thread* self) { return false; } } - LockWord fat(this); + LockWord fat(this, lw.ReadBarrierState()); // Publish the updated lock word, which may race with other threads. bool success = GetObject()->CasLockWordWeakSequentiallyConsistent(lw, fat); // Lock profiling. @@ -610,15 +610,22 @@ bool Monitor::Deflate(Thread* self, mirror::Object* obj) { return false; } // Deflate to a thin lock. - obj->SetLockWord(LockWord::FromThinLockId(owner->GetThreadId(), monitor->lock_count_), false); + LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(), monitor->lock_count_, + lw.ReadBarrierState()); + // Assume no concurrent read barrier state changes as mutators are suspended. + obj->SetLockWord(new_lw, false); VLOG(monitor) << "Deflated " << obj << " to thin lock " << owner->GetTid() << " / " << monitor->lock_count_; } else if (monitor->HasHashCode()) { - obj->SetLockWord(LockWord::FromHashCode(monitor->GetHashCode()), false); + LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.ReadBarrierState()); + // Assume no concurrent read barrier state changes as mutators are suspended. + obj->SetLockWord(new_lw, false); VLOG(monitor) << "Deflated " << obj << " to hash monitor " << monitor->GetHashCode(); } else { // No lock and no hash, just put an empty lock word inside the object. - obj->SetLockWord(LockWord(), false); + LockWord new_lw = LockWord::FromDefault(lw.ReadBarrierState()); + // Assume no concurrent read barrier state changes as mutators are suspended. + obj->SetLockWord(new_lw, false); VLOG(monitor) << "Deflated" << obj << " to empty lock word"; } // The monitor is deflated, mark the object as nullptr so that we know to delete it during the @@ -704,7 +711,7 @@ mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj) { LockWord lock_word = h_obj->GetLockWord(true); switch (lock_word.GetState()) { case LockWord::kUnlocked: { - LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0)); + LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.ReadBarrierState())); if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) { // CasLockWord enforces more than the acquire ordering we need here. return h_obj.Get(); // Success! @@ -717,9 +724,18 @@ mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj) { // We own the lock, increase the recursion count. uint32_t new_count = lock_word.ThinLockCount() + 1; if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) { - LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count)); - h_obj->SetLockWord(thin_locked, true); - return h_obj.Get(); // Success! + LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count, + lock_word.ReadBarrierState())); + if (!kUseReadBarrier) { + h_obj->SetLockWord(thin_locked, true); + return h_obj.Get(); // Success! + } else { + // Use CAS to preserve the read barrier state. + if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) { + return h_obj.Get(); // Success! + } + } + continue; // Go again. } else { // We'd overflow the recursion count, so inflate the monitor. InflateThinLocked(self, h_obj, lock_word, 0); @@ -762,43 +778,57 @@ bool Monitor::MonitorExit(Thread* self, mirror::Object* obj) { DCHECK(self != NULL); DCHECK(obj != NULL); obj = FakeUnlock(obj); - LockWord lock_word = obj->GetLockWord(true); StackHandleScope<1> hs(self); Handle<mirror::Object> h_obj(hs.NewHandle(obj)); - switch (lock_word.GetState()) { - case LockWord::kHashCode: - // Fall-through. - case LockWord::kUnlocked: - FailedUnlock(h_obj.Get(), self, nullptr, nullptr); - return false; // Failure. - case LockWord::kThinLocked: { - uint32_t thread_id = self->GetThreadId(); - uint32_t owner_thread_id = lock_word.ThinLockOwner(); - if (owner_thread_id != thread_id) { - // TODO: there's a race here with the owner dying while we unlock. - Thread* owner = - Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner()); - FailedUnlock(h_obj.Get(), self, owner, nullptr); + while (true) { + LockWord lock_word = obj->GetLockWord(true); + switch (lock_word.GetState()) { + case LockWord::kHashCode: + // Fall-through. + case LockWord::kUnlocked: + FailedUnlock(h_obj.Get(), self, nullptr, nullptr); return false; // Failure. - } else { - // We own the lock, decrease the recursion count. - if (lock_word.ThinLockCount() != 0) { - uint32_t new_count = lock_word.ThinLockCount() - 1; - LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count)); - h_obj->SetLockWord(thin_locked, true); + case LockWord::kThinLocked: { + uint32_t thread_id = self->GetThreadId(); + uint32_t owner_thread_id = lock_word.ThinLockOwner(); + if (owner_thread_id != thread_id) { + // TODO: there's a race here with the owner dying while we unlock. + Thread* owner = + Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner()); + FailedUnlock(h_obj.Get(), self, owner, nullptr); + return false; // Failure. } else { - h_obj->SetLockWord(LockWord(), true); + // We own the lock, decrease the recursion count. + LockWord new_lw = LockWord::Default(); + if (lock_word.ThinLockCount() != 0) { + uint32_t new_count = lock_word.ThinLockCount() - 1; + new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.ReadBarrierState()); + } else { + new_lw = LockWord::FromDefault(lock_word.ReadBarrierState()); + } + if (!kUseReadBarrier) { + DCHECK_EQ(new_lw.ReadBarrierState(), 0U); + h_obj->SetLockWord(new_lw, true); + // Success! + return true; + } else { + // Use CAS to preserve the read barrier state. + if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, new_lw)) { + // Success! + return true; + } + } + continue; // Go again. } - return true; // Success! } - } - case LockWord::kFatLocked: { - Monitor* mon = lock_word.FatLockMonitor(); - return mon->Unlock(self); - } - default: { - LOG(FATAL) << "Invalid monitor state " << lock_word.GetState(); - return false; + case LockWord::kFatLocked: { + Monitor* mon = lock_word.FatLockMonitor(); + return mon->Unlock(self); + } + default: { + LOG(FATAL) << "Invalid monitor state " << lock_word.GetState(); + return false; + } } } } diff --git a/runtime/monitor.h b/runtime/monitor.h index 0c5f8a43b1..95e4460f5d 100644 --- a/runtime/monitor.h +++ b/runtime/monitor.h @@ -19,6 +19,7 @@ #include <pthread.h> #include <stdint.h> +#include <stdlib.h> #include <iosfwd> #include <list> @@ -28,6 +29,7 @@ #include "base/allocator.h" #include "base/mutex.h" #include "gc_root.h" +#include "lock_word.h" #include "object_callbacks.h" #include "read_barrier_option.h" #include "thread_state.h" @@ -127,8 +129,20 @@ class Monitor { uint32_t hash_code) NO_THREAD_SAFETY_ANALYSIS; static bool Deflate(Thread* self, mirror::Object* obj) + // Not exclusive because ImageWriter calls this during a Heap::VisitObjects() that + // does not allow a thread suspension in the middle. TODO: maybe make this exclusive. SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); +#ifndef __LP64__ + void* operator new(size_t size) { + // Align Monitor* as per the monitor ID field size in the lock word. + void* result; + int error = posix_memalign(&result, LockWord::kMonitorIdAlignment, size); + CHECK_EQ(error, 0) << strerror(error); + return result; + } +#endif + private: explicit Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -160,7 +174,8 @@ class Monitor { const char* owner_filename, uint32_t owner_line_number) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - static void FailedUnlock(mirror::Object* obj, Thread* expected_owner, Thread* found_owner, Monitor* mon) + static void FailedUnlock(mirror::Object* obj, Thread* expected_owner, Thread* found_owner, + Monitor* mon) LOCKS_EXCLUDED(Locks::thread_list_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h index 27678dcbdd..8ae5a54fe7 100644 --- a/runtime/monitor_pool.h +++ b/runtime/monitor_pool.h @@ -45,7 +45,9 @@ class MonitorPool { static Monitor* CreateMonitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { #ifndef __LP64__ - return new Monitor(self, owner, obj, hash_code); + Monitor* mon = new Monitor(self, owner, obj, hash_code); + DCHECK_ALIGNED(mon, LockWord::kMonitorIdAlignment); + return mon; #else return GetMonitorPool()->CreateMonitorInPool(self, owner, obj, hash_code); #endif @@ -71,7 +73,7 @@ class MonitorPool { static Monitor* MonitorFromMonitorId(MonitorId mon_id) { #ifndef __LP64__ - return reinterpret_cast<Monitor*>(mon_id << 3); + return reinterpret_cast<Monitor*>(mon_id << LockWord::kMonitorIdAlignmentShift); #else return GetMonitorPool()->LookupMonitor(mon_id); #endif @@ -79,7 +81,7 @@ class MonitorPool { static MonitorId MonitorIdFromMonitor(Monitor* mon) { #ifndef __LP64__ - return reinterpret_cast<MonitorId>(mon) >> 3; + return reinterpret_cast<MonitorId>(mon) >> LockWord::kMonitorIdAlignmentShift; #else return mon->GetMonitorId(); #endif diff --git a/runtime/read_barrier_c.h b/runtime/read_barrier_c.h index 49efaa2bd7..a2c4c36a2f 100644 --- a/runtime/read_barrier_c.h +++ b/runtime/read_barrier_c.h @@ -35,6 +35,10 @@ #define USE_BAKER_OR_BROOKS_READ_BARRIER #endif +#if defined(USE_BAKER_READ_BARRIER) || defined(USE_BROOKS_READ_BARRIER) || defined(USE_TABLE_LOOKUP_READ_BARRIER) +#define USE_READ_BARRIER +#endif + #if defined(USE_BAKER_READ_BARRIER) && defined(USE_BROOKS_READ_BARRIER) #error "Only one of Baker or Brooks can be enabled at a time." #endif diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index 83c5ffb135..d4c1e8c39c 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -51,10 +51,9 @@ static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000; static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000; ThreadList::ThreadList() - : suspend_all_count_(0), debug_suspend_all_count_(0), - thread_exit_cond_("thread exit condition variable", *Locks::thread_list_lock_), + : suspend_all_count_(0), debug_suspend_all_count_(0), unregistering_count_(0), suspend_all_historam_("suspend all histogram", 16, 64) { - CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1))); + CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U))); } ThreadList::~ThreadList() { @@ -70,7 +69,6 @@ ThreadList::~ThreadList() { if (contains) { Runtime::Current()->DetachCurrentThread(); } - WaitForOtherNonDaemonThreadsToExit(); // TODO: there's an unaddressed race here where a thread may attach during shutdown, see // Thread::Init. @@ -1002,27 +1000,32 @@ void ThreadList::UndoDebuggerSuspensions() { void ThreadList::WaitForOtherNonDaemonThreadsToExit() { Thread* self = Thread::Current(); Locks::mutator_lock_->AssertNotHeld(self); - bool all_threads_are_daemons; - do { + while (true) { { // No more threads can be born after we start to shutdown. MutexLock mu(self, *Locks::runtime_shutdown_lock_); CHECK(Runtime::Current()->IsShuttingDownLocked()); CHECK_EQ(Runtime::Current()->NumberOfThreadsBeingBorn(), 0U); } - all_threads_are_daemons = true; MutexLock mu(self, *Locks::thread_list_lock_); - for (const auto& thread : list_) { - if (thread != self && !thread->IsDaemon()) { - all_threads_are_daemons = false; - break; + // Also wait for any threads that are unregistering to finish. This is required so that no + // threads access the thread list after it is deleted. TODO: This may not work for user daemon + // threads since they could unregister at the wrong time. + bool done = unregistering_count_ == 0; + if (done) { + for (const auto& thread : list_) { + if (thread != self && !thread->IsDaemon()) { + done = false; + break; + } } } - if (!all_threads_are_daemons) { - // Wait for another thread to exit before re-checking. - thread_exit_cond_.Wait(self); + if (done) { + break; } - } while (!all_threads_are_daemons); + // Wait for another thread to exit before re-checking. + Locks::thread_exit_cond_->Wait(self); + } } void ThreadList::SuspendAllDaemonThreads() { @@ -1092,42 +1095,45 @@ void ThreadList::Unregister(Thread* self) { VLOG(threads) << "ThreadList::Unregister() " << *self; + { + MutexLock mu(self, *Locks::thread_list_lock_); + ++unregistering_count_; + } + // Any time-consuming destruction, plus anything that can call back into managed code or - // suspend and so on, must happen at this point, and not in ~Thread. + // suspend and so on, must happen at this point, and not in ~Thread. The self->Destroy is what + // causes the threads to join. It is important to do this after incrementing unregistering_count_ + // since we want the runtime to wait for the daemon threads to exit before deleting the thread + // list. self->Destroy(); // If tracing, remember thread id and name before thread exits. Trace::StoreExitingThreadInfo(self); uint32_t thin_lock_id = self->GetThreadId(); - while (self != nullptr) { + while (true) { // Remove and delete the Thread* while holding the thread_list_lock_ and // thread_suspend_count_lock_ so that the unregistering thread cannot be suspended. // Note: deliberately not using MutexLock that could hold a stale self pointer. - Locks::thread_list_lock_->ExclusiveLock(self); - bool removed = true; + MutexLock mu(self, *Locks::thread_list_lock_); if (!Contains(self)) { std::string thread_name; self->GetThreadName(thread_name); std::ostringstream os; DumpNativeStack(os, GetTid(), " native: ", nullptr); LOG(ERROR) << "Request to unregister unattached thread " << thread_name << "\n" << os.str(); + break; } else { - Locks::thread_suspend_count_lock_->ExclusiveLock(self); + MutexLock mu2(self, *Locks::thread_suspend_count_lock_); if (!self->IsSuspended()) { list_.remove(self); - } else { - // We failed to remove the thread due to a suspend request, loop and try again. - removed = false; + break; } - Locks::thread_suspend_count_lock_->ExclusiveUnlock(self); - } - Locks::thread_list_lock_->ExclusiveUnlock(self); - if (removed) { - delete self; - self = nullptr; } + // We failed to remove the thread due to a suspend request, loop and try again. } + delete self; + // Release the thread ID after the thread is finished and deleted to avoid cases where we can // temporarily have multiple threads with the same thread id. When this occurs, it causes // problems in FindThreadByThreadId / SuspendThreadByThreadId. @@ -1138,8 +1144,9 @@ void ThreadList::Unregister(Thread* self) { CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, NULL), "detach self"); // Signal that a thread just detached. - MutexLock mu(NULL, *Locks::thread_list_lock_); - thread_exit_cond_.Signal(NULL); + MutexLock mu(nullptr, *Locks::thread_list_lock_); + --unregistering_count_; + Locks::thread_exit_cond_->Broadcast(nullptr); } void ThreadList::ForEach(void (*callback)(Thread*, void*), void* context) { diff --git a/runtime/thread_list.h b/runtime/thread_list.h index d18315aab4..de0dd7983c 100644 --- a/runtime/thread_list.h +++ b/runtime/thread_list.h @@ -177,8 +177,8 @@ class ThreadList { int suspend_all_count_ GUARDED_BY(Locks::thread_suspend_count_lock_); int debug_suspend_all_count_ GUARDED_BY(Locks::thread_suspend_count_lock_); - // Signaled when threads terminate. Used to determine when all non-daemons have terminated. - ConditionVariable thread_exit_cond_ GUARDED_BY(Locks::thread_list_lock_); + // Number of threads unregistering, ~ThreadList blocks until this hits 0. + int unregistering_count_ GUARDED_BY(Locks::thread_list_lock_); // Thread suspend time histogram. Only modified when all the threads are suspended, so guarding // by mutator lock ensures no thread can read when another thread is modifying it. diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc index b80fe22cd5..5db51c8e93 100644 --- a/runtime/transaction_test.cc +++ b/runtime/transaction_test.cc @@ -63,7 +63,7 @@ class TransactionTest : public CommonRuntimeTest { ASSERT_TRUE(h_klass->IsVerified()); mirror::Class::Status old_status = h_klass->GetStatus(); - uint32_t old_lock_word = h_klass->GetLockWord(false).GetValue(); + LockWord old_lock_word = h_klass->GetLockWord(false); Transaction transaction; Runtime::Current()->EnterTransactionMode(&transaction); @@ -75,8 +75,8 @@ class TransactionTest : public CommonRuntimeTest { ASSERT_TRUE(transaction.IsAborted()); // Check class's monitor get back to its original state without rolling back changes. - uint32_t new_lock_word = h_klass->GetLockWord(false).GetValue(); - EXPECT_EQ(old_lock_word, new_lock_word); + LockWord new_lock_word = h_klass->GetLockWord(false); + EXPECT_TRUE(LockWord::Equal<false>(old_lock_word, new_lock_word)); // Check class status is rolled back properly. soa.Self()->ClearException(); @@ -118,20 +118,20 @@ TEST_F(TransactionTest, Object_monitor) { // Lock object's monitor outside the transaction. h_obj->MonitorEnter(soa.Self()); - uint32_t old_lock_word = h_obj->GetLockWord(false).GetValue(); + LockWord old_lock_word = h_obj->GetLockWord(false); Transaction transaction; Runtime::Current()->EnterTransactionMode(&transaction); // Unlock object's monitor inside the transaction. h_obj->MonitorExit(soa.Self()); - uint32_t new_lock_word = h_obj->GetLockWord(false).GetValue(); + LockWord new_lock_word = h_obj->GetLockWord(false); Runtime::Current()->ExitTransactionMode(); // Rolling back transaction's changes must not change monitor's state. transaction.Rollback(); - uint32_t aborted_lock_word = h_obj->GetLockWord(false).GetValue(); - EXPECT_NE(old_lock_word, new_lock_word); - EXPECT_EQ(aborted_lock_word, new_lock_word); + LockWord aborted_lock_word = h_obj->GetLockWord(false); + EXPECT_FALSE(LockWord::Equal<false>(old_lock_word, new_lock_word)); + EXPECT_TRUE(LockWord::Equal<false>(aborted_lock_word, new_lock_word)); } // Tests array's length is preserved after transaction rollback. diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java index ebd5b0e6a3..30aa870acb 100644 --- a/test/449-checker-bce/src/Main.java +++ b/test/449-checker-bce/src/Main.java @@ -400,7 +400,18 @@ public class Main { } - // TODO: bce on the array accesses in this method. + // CHECK-START: boolean Main.isPyramid(int[]) BCE (before) + // CHECK: BoundsCheck + // CHECK: ArrayGet + // CHECK: BoundsCheck + // CHECK: ArrayGet + + // CHECK-START: boolean Main.isPyramid(int[]) BCE (after) + // CHECK-NOT: BoundsCheck + // CHECK: ArrayGet + // CHECK-NOT: BoundsCheck + // CHECK: ArrayGet + static boolean isPyramid(int[] array) { int i = 0; int j = array.length - 1; diff --git a/test/455-checker-gvn/expected.txt b/test/455-checker-gvn/expected.txt new file mode 100644 index 0000000000..8351c19397 --- /dev/null +++ b/test/455-checker-gvn/expected.txt @@ -0,0 +1 @@ +14 diff --git a/test/455-checker-gvn/info.txt b/test/455-checker-gvn/info.txt new file mode 100644 index 0000000000..dfffd92935 --- /dev/null +++ b/test/455-checker-gvn/info.txt @@ -0,0 +1 @@ +Checker test for GVN. diff --git a/test/455-checker-gvn/src/Main.java b/test/455-checker-gvn/src/Main.java new file mode 100644 index 0000000000..e94fc46654 --- /dev/null +++ b/test/455-checker-gvn/src/Main.java @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static void main(String[] args) { + System.out.println(foo(3, 4)); + } + + // CHECK-START: int Main.foo(int, int) GVN (before) + // CHECK: Add + // CHECK: Add + // CHECK: Add + + // CHECK-START: int Main.foo(int, int) GVN (after) + // CHECK: Add + // CHECK: Add + // CHECK-NOT: Add + + public static int foo(int x, int y) { + int sum1 = x + y; + int sum2 = y + x; + return sum1 + sum2; + } + + public static long bar(int i) { + return i; + } +} |