diff options
61 files changed, 2596 insertions, 524 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 6967808b8e..09790fe8e5 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -155,6 +155,7 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \ runtime/java_vm_ext_test.cc \ runtime/leb128_test.cc \ runtime/mem_map_test.cc \ + runtime/memory_region_test.cc \ runtime/mirror/dex_cache_test.cc \ runtime/mirror/object_test.cc \ runtime/monitor_pool_test.cc \ diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 78dd6cc29e..be6c41a834 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -1860,6 +1860,12 @@ static void VerifyClass(const ParallelCompilationManager* manager, size_t class_ CHECK(klass->IsCompileTimeVerified() || klass->IsErroneous()) << PrettyDescriptor(klass.Get()) << ": state=" << klass->GetStatus(); + + // It is *very* problematic if there are verification errors in the boot classpath. For example, + // we rely on things working OK without verification when the decryption dialog is brought up. + // So abort in a debug build if we find this violated. + DCHECK(!manager->GetCompiler()->IsImage() || klass->IsVerified()) << "Boot classpath class " << + PrettyClass(klass.Get()) << " failed to fully verify."; } soa.Self()->AssertNoPendingException(); } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index ed3f949afe..7d256ae4aa 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -610,7 +610,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { for (size_t i = 0; i < environment_size; ++i) { HInstruction* current = environment->GetInstructionAt(i); if (current == nullptr) { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kNone, 0); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); continue; } @@ -620,37 +620,43 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { DCHECK_EQ(current, location.GetConstant()); if (current->IsLongConstant()) { int64_t value = current->AsLongConstant()->GetValue(); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, Low32Bits(value)); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, High32Bits(value)); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, + Low32Bits(value)); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, + High32Bits(value)); ++i; DCHECK_LT(i, environment_size); } else if (current->IsDoubleConstant()) { int64_t value = bit_cast<double, int64_t>(current->AsDoubleConstant()->GetValue()); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, Low32Bits(value)); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, High32Bits(value)); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, + Low32Bits(value)); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, + High32Bits(value)); ++i; DCHECK_LT(i, environment_size); } else if (current->IsIntConstant()) { int32_t value = current->AsIntConstant()->GetValue(); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, value); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); } else if (current->IsNullConstant()) { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, 0); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, 0); } else { DCHECK(current->IsFloatConstant()); int32_t value = bit_cast<float, int32_t>(current->AsFloatConstant()->GetValue()); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, value); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); } break; } case Location::kStackSlot: { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInStack, location.GetStackIndex()); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, + location.GetStackIndex()); break; } case Location::kDoubleStackSlot: { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInStack, location.GetStackIndex()); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInStack, + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, + location.GetStackIndex()); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize)); ++i; DCHECK_LT(i, environment_size); @@ -659,9 +665,9 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { case Location::kRegister : { int id = location.reg(); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, id); if (current->GetType() == Primitive::kPrimLong) { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, id); ++i; DCHECK_LT(i, environment_size); } @@ -670,9 +676,9 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { case Location::kFpuRegister : { int id = location.reg(); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, id); if (current->GetType() == Primitive::kPrimDouble) { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, id); ++i; DCHECK_LT(i, environment_size); } @@ -680,16 +686,20 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { } case Location::kFpuRegisterPair : { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.low()); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.high()); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, + location.low()); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, + location.high()); ++i; DCHECK_LT(i, environment_size); break; } case Location::kRegisterPair : { - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.low()); - stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.high()); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, + location.low()); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, + location.high()); ++i; DCHECK_LT(i, environment_size); break; diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index fca9933872..ec0cc3e98b 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -18,7 +18,28 @@ namespace art { +// This visitor tries to simplify operations that yield a constant. For example +// `input * 0` is replaced by a null constant. +class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor { + public: + explicit InstructionWithAbsorbingInputSimplifier(HGraph* graph) : HGraphVisitor(graph) {} + + private: + void VisitShift(HBinaryOperation* shift); + + void VisitAnd(HAnd* instruction) OVERRIDE; + void VisitMul(HMul* instruction) OVERRIDE; + void VisitOr(HOr* instruction) OVERRIDE; + void VisitRem(HRem* instruction) OVERRIDE; + void VisitShl(HShl* instruction) OVERRIDE; + void VisitShr(HShr* instruction) OVERRIDE; + void VisitSub(HSub* instruction) OVERRIDE; + void VisitUShr(HUShr* instruction) OVERRIDE; + void VisitXor(HXor* instruction) OVERRIDE; +}; + void HConstantFolding::Run() { + InstructionWithAbsorbingInputSimplifier simplifier(graph_); // Process basic blocks in reverse post-order in the dominator tree, // so that an instruction turned into a constant, used as input of // another instruction, may possibly be used to turn that second @@ -38,6 +59,8 @@ void HConstantFolding::Run() { inst->AsBinaryOperation()->TryStaticEvaluation(); if (constant != nullptr) { inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant); + } else { + inst->Accept(&simplifier); } } else if (inst->IsUnaryOperation()) { // Constant folding: replace `op(a)' with a constant at compile @@ -47,9 +70,166 @@ void HConstantFolding::Run() { if (constant != nullptr) { inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant); } + } else if (inst->IsDivZeroCheck()) { + // We can safely remove the check if the input is a non-null constant. + HDivZeroCheck* check = inst->AsDivZeroCheck(); + HInstruction* check_input = check->InputAt(0); + if (check_input->IsConstant() && !check_input->AsConstant()->IsZero()) { + check->ReplaceWith(check_input); + check->GetBlock()->RemoveInstruction(check); + } } } } } +void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instruction) { + DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); + HInstruction* left = instruction->GetLeft(); + if (left->IsConstant() && left->AsConstant()->IsZero()) { + // Replace code looking like + // SHL dst, 0, shift_amount + // with + // CONSTANT 0 + instruction->ReplaceWith(left); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + if ((input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // AND dst, src, 0 + // with + // CONSTANT 0 + instruction->ReplaceWith(input_cst); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitMul(HMul* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + Primitive::Type type = instruction->GetType(); + if (Primitive::IsIntOrLongType(type) && + (input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // MUL dst, src, 0 + // with + // CONSTANT 0 + // Integral multiplication by zero always yields zero, but floating-point + // multiplication by zero does not always do. For example `Infinity * 0.0` + // should yield a NaN. + instruction->ReplaceWith(input_cst); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitOr(HOr* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + + if (input_cst == nullptr) { + return; + } + + if (Int64FromConstant(input_cst) == -1) { + // Replace code looking like + // OR dst, src, 0xFFF...FF + // with + // CONSTANT 0xFFF...FF + instruction->ReplaceWith(input_cst); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitRem(HRem* instruction) { + Primitive::Type type = instruction->GetType(); + + if (!Primitive::IsIntegralType(type)) { + return; + } + + HBasicBlock* block = instruction->GetBlock(); + + if (instruction->GetLeft()->IsConstant() && + instruction->GetLeft()->AsConstant()->IsZero()) { + // Replace code looking like + // REM dst, 0, src + // with + // CONSTANT 0 + instruction->ReplaceWith(instruction->GetLeft()); + block->RemoveInstruction(instruction); + } + + HConstant* cst_right = instruction->GetRight()->AsConstant(); + if (((cst_right != nullptr) && + (cst_right->IsOne() || cst_right->IsMinusOne())) || + (instruction->GetLeft() == instruction->GetRight())) { + // Replace code looking like + // REM dst, src, 1 + // or + // REM dst, src, -1 + // or + // REM dst, src, src + // with + // CONSTANT 0 + ArenaAllocator* allocator = GetGraph()->GetArena(); + block->ReplaceAndRemoveInstructionWith(instruction, + HConstant::NewConstant(allocator, type, 0)); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitShl(HShl* instruction) { + VisitShift(instruction); +} + +void InstructionWithAbsorbingInputSimplifier::VisitShr(HShr* instruction) { + VisitShift(instruction); +} + +void InstructionWithAbsorbingInputSimplifier::VisitSub(HSub* instruction) { + Primitive::Type type = instruction->GetType(); + + if (!Primitive::IsIntegralType(type)) { + return; + } + + HBasicBlock* block = instruction->GetBlock(); + ArenaAllocator* allocator = GetGraph()->GetArena(); + + // We assume that GVN has run before, so we only perform a pointer + // comparison. If for some reason the values are equal but the pointers are + // different, we are still correct and only miss an optimisation + // opportunity. + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // SUB dst, src, src + // with + // CONSTANT 0 + // Note that we cannot optimise `x - x` to `0` for floating-point. It does + // not work when `x` is an infinity. + block->ReplaceAndRemoveInstructionWith(instruction, + HConstant::NewConstant(allocator, type, 0)); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitUShr(HUShr* instruction) { + VisitShift(instruction); +} + +void InstructionWithAbsorbingInputSimplifier::VisitXor(HXor* instruction) { + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // XOR dst, src, src + // with + // CONSTANT 0 + Primitive::Type type = instruction->GetType(); + HBasicBlock* block = instruction->GetBlock(); + ArenaAllocator* allocator = GetGraph()->GetArena(); + + block->ReplaceAndRemoveInstructionWith(instruction, + HConstant::NewConstant(allocator, type, 0)); + } +} + } // namespace art diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index fd99070780..2ef19b92a1 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -27,6 +27,8 @@ class InstructionSimplifierVisitor : public HGraphVisitor { : HGraphVisitor(graph), stats_(stats) {} private: + void VisitShift(HBinaryOperation* shift); + void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE; void VisitEqual(HEqual* equal) OVERRIDE; void VisitArraySet(HArraySet* equal) OVERRIDE; @@ -34,6 +36,16 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitNullCheck(HNullCheck* instruction) OVERRIDE; void VisitArrayLength(HArrayLength* instruction) OVERRIDE; void VisitCheckCast(HCheckCast* instruction) OVERRIDE; + void VisitAdd(HAdd* instruction) OVERRIDE; + void VisitAnd(HAnd* instruction) OVERRIDE; + void VisitDiv(HDiv* instruction) OVERRIDE; + void VisitMul(HMul* instruction) OVERRIDE; + void VisitOr(HOr* instruction) OVERRIDE; + void VisitShl(HShl* instruction) OVERRIDE; + void VisitShr(HShr* instruction) OVERRIDE; + void VisitSub(HSub* instruction) OVERRIDE; + void VisitUShr(HUShr* instruction) OVERRIDE; + void VisitXor(HXor* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; @@ -43,6 +55,29 @@ void InstructionSimplifier::Run() { visitor.VisitInsertionOrder(); } +namespace { + +bool AreAllBitsSet(HConstant* constant) { + return Int64FromConstant(constant) == -1; +} + +} // namespace + +void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { + DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + + if ((input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // SHL dst, src, 0 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) { HInstruction* obj = null_check->InputAt(0); if (!obj->CanBeNull()) { @@ -137,4 +172,234 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct } } +void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + if ((input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // ADD dst, src, 0 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + + if ((input_cst != nullptr) && AreAllBitsSet(input_cst)) { + // Replace code looking like + // AND dst, src, 0xFFF...FF + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + // We assume that GVN has run before, so we only perform a pointer comparison. + // If for some reason the values are equal but the pointers are different, we + // are still correct and only miss an optimisation opportunity. + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // AND dst, src, src + // with + // src + instruction->ReplaceWith(instruction->GetLeft()); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + Primitive::Type type = instruction->GetType(); + + if ((input_cst != nullptr) && input_cst->IsOne()) { + // Replace code looking like + // DIV dst, src, 1 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + if ((input_cst != nullptr) && input_cst->IsMinusOne() && + (Primitive::IsFloatingPointType(type) || Primitive::IsIntOrLongType(type))) { + // Replace code looking like + // DIV dst, src, -1 + // with + // NEG dst, src + instruction->GetBlock()->ReplaceAndRemoveInstructionWith( + instruction, (new (GetGraph()->GetArena()) HNeg(type, input_other))); + } +} + +void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + Primitive::Type type = instruction->GetType(); + HBasicBlock* block = instruction->GetBlock(); + ArenaAllocator* allocator = GetGraph()->GetArena(); + + if (input_cst == nullptr) { + return; + } + + if (input_cst->IsOne()) { + // Replace code looking like + // MUL dst, src, 1 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + if (input_cst->IsMinusOne() && + (Primitive::IsFloatingPointType(type) || Primitive::IsIntOrLongType(type))) { + // Replace code looking like + // MUL dst, src, -1 + // with + // NEG dst, src + HNeg* neg = new (allocator) HNeg(type, input_other); + block->ReplaceAndRemoveInstructionWith(instruction, neg); + return; + } + + if (Primitive::IsFloatingPointType(type) && + ((input_cst->IsFloatConstant() && input_cst->AsFloatConstant()->GetValue() == 2.0f) || + (input_cst->IsDoubleConstant() && input_cst->AsDoubleConstant()->GetValue() == 2.0))) { + // Replace code looking like + // FP_MUL dst, src, 2.0 + // with + // FP_ADD dst, src, src + // The 'int' and 'long' cases are handled below. + block->ReplaceAndRemoveInstructionWith(instruction, + new (allocator) HAdd(type, input_other, input_other)); + return; + } + + if (Primitive::IsIntOrLongType(type)) { + int64_t factor = Int64FromConstant(input_cst); + // We expect the `0` case to have been handled in the constant folding pass. + DCHECK_NE(factor, 0); + if (IsPowerOfTwo(factor)) { + // Replace code looking like + // MUL dst, src, pow_of_2 + // with + // SHL dst, src, log2(pow_of_2) + HIntConstant* shift = new (allocator) HIntConstant(WhichPowerOf2(factor)); + block->InsertInstructionBefore(shift, instruction); + HShl* shl = new(allocator) HShl(type, input_other, shift); + block->ReplaceAndRemoveInstructionWith(instruction, shl); + } + } +} + +void InstructionSimplifierVisitor::VisitOr(HOr* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + + if ((input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // OR dst, src, 0 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + // We assume that GVN has run before, so we only perform a pointer comparison. + // If for some reason the values are equal but the pointers are different, we + // are still correct and only miss an optimisation opportunity. + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // OR dst, src, src + // with + // src + instruction->ReplaceWith(instruction->GetLeft()); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionSimplifierVisitor::VisitShl(HShl* instruction) { + VisitShift(instruction); +} + +void InstructionSimplifierVisitor::VisitShr(HShr* instruction) { + VisitShift(instruction); +} + +void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + + if ((input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // SUB dst, src, 0 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + Primitive::Type type = instruction->GetType(); + if (!Primitive::IsIntegralType(type)) { + return; + } + + HBasicBlock* block = instruction->GetBlock(); + ArenaAllocator* allocator = GetGraph()->GetArena(); + + if (instruction->GetLeft()->IsConstant()) { + int64_t left = Int64FromConstant(instruction->GetLeft()->AsConstant()); + if (left == 0) { + // Replace code looking like + // SUB dst, 0, src + // with + // NEG dst, src + // Note that we cannot optimise `0.0 - x` to `-x` for floating-point. When + // `x` is `0.0`, the former expression yields `0.0`, while the later + // yields `-0.0`. + HNeg* neg = new (allocator) HNeg(type, instruction->GetRight()); + block->ReplaceAndRemoveInstructionWith(instruction, neg); + } + } +} + +void InstructionSimplifierVisitor::VisitUShr(HUShr* instruction) { + VisitShift(instruction); +} + +void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + HInstruction* input_other = instruction->GetLeastConstantLeft(); + + if ((input_cst != nullptr) && input_cst->IsZero()) { + // Replace code looking like + // XOR dst, src, 0 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + if ((input_cst != nullptr) && AreAllBitsSet(input_cst)) { + // Replace code looking like + // XOR dst, src, 0xFFF...FF + // with + // NOT dst, src + HNot* bitwise_not = new (GetGraph()->GetArena()) HNot(instruction->GetType(), input_other); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, bitwise_not); + return; + } +} + } // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index e51bbc330a..a90ebced69 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -673,10 +673,43 @@ HConstant* HBinaryOperation::TryStaticEvaluation() const { return nullptr; } +HConstant* HBinaryOperation::GetConstantRight() const { + if (GetRight()->IsConstant()) { + return GetRight()->AsConstant(); + } else if (IsCommutative() && GetLeft()->IsConstant()) { + return GetLeft()->AsConstant(); + } else { + return nullptr; + } +} + +// If `GetConstantRight()` returns one of the input, this returns the other +// one. Otherwise it returns nullptr. +HInstruction* HBinaryOperation::GetLeastConstantLeft() const { + HInstruction* most_constant_right = GetConstantRight(); + if (most_constant_right == nullptr) { + return nullptr; + } else if (most_constant_right == GetLeft()) { + return GetRight(); + } else { + return GetLeft(); + } +} + bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const { return this == if_->GetPreviousDisregardingMoves(); } +HConstant* HConstant::NewConstant(ArenaAllocator* allocator, Primitive::Type type, int64_t val) { + if (type == Primitive::kPrimInt) { + DCHECK(IsInt<32>(val)); + return new (allocator) HIntConstant(val); + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + return new (allocator) HLongConstant(val); + } +} + bool HInstruction::Equals(HInstruction* other) const { if (!InstructionTypeEquals(other)) return false; DCHECK_EQ(GetKind(), other->GetKind()); @@ -907,7 +940,8 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { } else { if (!returns_void) { // There will be multiple returns. - return_value = new (allocator) HPhi(allocator, kNoRegNumber, 0, invoke->GetType()); + return_value = new (allocator) HPhi( + allocator, kNoRegNumber, 0, HPhi::ToPhiType(invoke->GetType())); to->AddPhi(return_value->AsPhi()); } for (size_t i = 0, e = to->GetPredecessors().Size(); i < e; ++i) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index d4498a6d42..ec3d7438ab 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -133,8 +133,13 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // recognition. Returns whether it was successful in doing all these steps. bool TryBuildingSsa() { BuildDominatorTree(); + // The SSA builder requires loops to all be natural. Specifically, the dead phi + // elimination phase checks the consistency of the graph when doing a post-order + // visit for eliminating dead phis: a dead phi can only have loop header phi + // users remaining when being visited. + if (!AnalyzeNaturalLoops()) return false; TransformToSsa(); - return AnalyzeNaturalLoops(); + return true; } void BuildDominatorTree(); @@ -1569,6 +1574,14 @@ class HBinaryOperation : public HExpression<2> { virtual int32_t Evaluate(int32_t x, int32_t y) const = 0; virtual int64_t Evaluate(int64_t x, int64_t y) const = 0; + // Returns an input that can legally be used as the right input and is + // constant, or nullptr. + HConstant* GetConstantRight() const; + + // If `GetConstantRight()` returns one of the input, this returns the other + // one. Otherwise it returns nullptr. + HInstruction* GetLeastConstantLeft() const; + DECLARE_INSTRUCTION(BinaryOperation); private: @@ -1840,6 +1853,12 @@ class HConstant : public HExpression<0> { bool CanBeMoved() const OVERRIDE { return true; } + virtual bool IsMinusOne() const { return false; } + virtual bool IsZero() const { return false; } + virtual bool IsOne() const { return false; } + + static HConstant* NewConstant(ArenaAllocator* allocator, Primitive::Type type, int64_t val); + DECLARE_INSTRUCTION(Constant); private: @@ -1859,6 +1878,16 @@ class HFloatConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } + bool IsMinusOne() const OVERRIDE { + return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>((-1.0f)); + } + bool IsZero() const OVERRIDE { + return AsFloatConstant()->GetValue() == 0.0f; + } + bool IsOne() const OVERRIDE { + return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>(1.0f); + } + DECLARE_INSTRUCTION(FloatConstant); private: @@ -1880,6 +1909,16 @@ class HDoubleConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } + bool IsMinusOne() const OVERRIDE { + return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>((-1.0)); + } + bool IsZero() const OVERRIDE { + return AsDoubleConstant()->GetValue() == 0.0; + } + bool IsOne() const OVERRIDE { + return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>(1.0); + } + DECLARE_INSTRUCTION(DoubleConstant); private: @@ -1925,6 +1964,10 @@ class HIntConstant : public HConstant { // method is an workaround until we fix the above. bool ActAsNullConstant() const OVERRIDE { return value_ == 0; } + bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } + bool IsZero() const OVERRIDE { return GetValue() == 0; } + bool IsOne() const OVERRIDE { return GetValue() == 1; } + DECLARE_INSTRUCTION(IntConstant); private: @@ -1945,6 +1988,10 @@ class HLongConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } + bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } + bool IsZero() const OVERRIDE { return GetValue() == 0; } + bool IsOne() const OVERRIDE { return GetValue() == 1; } + DECLARE_INSTRUCTION(LongConstant); private: @@ -3473,6 +3520,12 @@ class HBlocksInLoopIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopIterator); }; +inline int64_t Int64FromConstant(HConstant* constant) { + DCHECK(constant->IsIntConstant() || constant->IsLongConstant()); + return constant->IsIntConstant() ? constant->AsIntConstant()->GetValue() + : constant->AsLongConstant()->GetValue(); +} + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_H_ diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 5283d5dcca..79bebd2e64 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -56,11 +56,6 @@ class StackMapStream : public ValueObject { size_t inline_infos_start_index; }; - struct DexRegisterEntry { - DexRegisterMap::LocationKind kind; - int32_t value; - }; - struct InlineInfoEntry { uint32_t method_index; }; @@ -90,11 +85,11 @@ class StackMapStream : public ValueObject { } } - void AddDexRegisterEntry(DexRegisterMap::LocationKind kind, int32_t value) { - DexRegisterEntry entry; - entry.kind = kind; - entry.value = value; - dex_register_maps_.Add(entry); + void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) { + // Ensure we only use non-compressed location kind at this stage. + DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) + << DexRegisterLocation::PrettyDescriptor(kind); + dex_register_maps_.Add(DexRegisterLocation(kind, value)); } void AddInlineInfoEntry(uint32_t method_index) { @@ -106,7 +101,7 @@ class StackMapStream : public ValueObject { size_t ComputeNeededSize() const { return CodeInfo::kFixedSize + ComputeStackMapSize() - + ComputeDexRegisterMapSize() + + ComputeDexRegisterMapsSize() + ComputeInlineInfoSize(); } @@ -114,27 +109,44 @@ class StackMapStream : public ValueObject { return stack_maps_.Size() * StackMap::ComputeAlignedStackMapSize(stack_mask_max_); } - size_t ComputeDexRegisterMapSize() const { - // We currently encode all dex register information per stack map. - return stack_maps_.Size() * DexRegisterMap::kFixedSize - // For each dex register entry. - + (dex_register_maps_.Size() * DexRegisterMap::SingleEntrySize()); + // Compute the size of the Dex register map of `entry`. + size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const { + size_t size = DexRegisterMap::kFixedSize; + for (size_t j = 0; j < entry.num_dex_registers; ++j) { + DexRegisterLocation dex_register_location = + dex_register_maps_.Get(entry.dex_register_maps_start_index + j); + size += DexRegisterMap::EntrySize(dex_register_location); + } + return size; + } + + // Compute the size of all the Dex register maps. + size_t ComputeDexRegisterMapsSize() const { + size_t size = stack_maps_.Size() * DexRegisterMap::kFixedSize; + // The size of each register location depends on the type of + // the entry. + for (size_t i = 0, e = dex_register_maps_.Size(); i < e; ++i) { + DexRegisterLocation entry = dex_register_maps_.Get(i); + size += DexRegisterMap::EntrySize(entry); + } + return size; } + // Compute the size of all the inline information pieces. size_t ComputeInlineInfoSize() const { return inline_infos_.Size() * InlineInfo::SingleEntrySize() // For encoding the depth. + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); } - size_t ComputeInlineInfoStart() const { - return ComputeDexRegisterMapStart() + ComputeDexRegisterMapSize(); - } - size_t ComputeDexRegisterMapStart() const { return CodeInfo::kFixedSize + ComputeStackMapSize(); } + size_t ComputeInlineInfoStart() const { + return ComputeDexRegisterMapStart() + ComputeDexRegisterMapsSize(); + } + void FillIn(MemoryRegion region) { CodeInfo code_info(region); code_info.SetOverallSize(region.size()); @@ -144,7 +156,7 @@ class StackMapStream : public ValueObject { MemoryRegion dex_register_maps_region = region.Subregion( ComputeDexRegisterMapStart(), - ComputeDexRegisterMapSize()); + ComputeDexRegisterMapsSize()); MemoryRegion inline_infos_region = region.Subregion( ComputeInlineInfoStart(), @@ -167,20 +179,25 @@ class StackMapStream : public ValueObject { } if (entry.num_dex_registers != 0) { - // Set the register map. - MemoryRegion register_region = dex_register_maps_region.Subregion( - next_dex_register_map_offset, - DexRegisterMap::kFixedSize - + entry.num_dex_registers * DexRegisterMap::SingleEntrySize()); + // Set the Dex register map. + MemoryRegion register_region = + dex_register_maps_region.Subregion( + next_dex_register_map_offset, + ComputeDexRegisterMapSize(entry)); next_dex_register_map_offset += register_region.size(); DexRegisterMap dex_register_map(register_region); stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start); + // Offset in `dex_register_map` where to store the next register entry. + size_t offset = DexRegisterMap::kFixedSize; for (size_t j = 0; j < entry.num_dex_registers; ++j) { - DexRegisterEntry register_entry = - dex_register_maps_.Get(j + entry.dex_register_maps_start_index); - dex_register_map.SetRegisterInfo(j, register_entry.kind, register_entry.value); + DexRegisterLocation dex_register_location = + dex_register_maps_.Get(entry.dex_register_maps_start_index + j); + dex_register_map.SetRegisterInfo(offset, dex_register_location); + offset += DexRegisterMap::EntrySize(dex_register_location); } + // Ensure we reached the end of the Dex registers region. + DCHECK_EQ(offset, register_region.size()); } else { stack_map.SetDexRegisterMapOffset(StackMap::kNoDexRegisterMap); } @@ -208,7 +225,7 @@ class StackMapStream : public ValueObject { private: GrowableArray<StackMapEntry> stack_maps_; - GrowableArray<DexRegisterEntry> dex_register_maps_; + GrowableArray<DexRegisterLocation> dex_register_maps_; GrowableArray<InlineInfoEntry> inline_infos_; int stack_mask_max_; size_t number_of_stack_maps_with_inline_info_; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 5b025106ac..3a5f80686d 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -22,7 +22,7 @@ namespace art { -bool SameBits(MemoryRegion region, const BitVector& bit_vector) { +static bool SameBits(MemoryRegion region, const BitVector& bit_vector) { for (size_t i = 0; i < region.size_in_bits(); ++i) { if (region.LoadBit(i) != bit_vector.IsBitSet(i)) { return false; @@ -31,9 +31,9 @@ bool SameBits(MemoryRegion region, const BitVector& bit_vector) { return true; } -size_t ComputeDexRegisterMapSize(size_t number_of_dex_registers) { - return DexRegisterMap::kFixedSize - + number_of_dex_registers * DexRegisterMap::SingleEntrySize(); +static size_t ComputeDexRegisterMapSize(const DexRegisterMap& dex_registers, + size_t number_of_dex_registers) { + return dex_registers.FindLocationOffset(number_of_dex_registers); } TEST(StackMapTest, Test1) { @@ -44,8 +44,8 @@ TEST(StackMapTest, Test1) { ArenaBitVector sp_mask(&arena, 0, false); size_t number_of_dex_registers = 2; stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(DexRegisterMap::kInStack, 0); - stream.AddDexRegisterEntry(DexRegisterMap::kConstant, -2); + stream.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, 0); + stream.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, -2); size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -67,14 +67,17 @@ TEST(StackMapTest, Test1) { ASSERT_TRUE(SameBits(stack_mask, sp_mask)); ASSERT_TRUE(stack_map.HasDexRegisterMap()); - DexRegisterMap dex_registers = - code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(16u, dex_registers.Size()); - ASSERT_EQ(16u, ComputeDexRegisterMapSize(number_of_dex_registers)); - ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0)); - ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1)); - ASSERT_EQ(0, dex_registers.GetValue(0)); - ASSERT_EQ(-2, dex_registers.GetValue(1)); + DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); + ASSERT_EQ(6u, dex_registers.Size()); + ASSERT_EQ(6u, ComputeDexRegisterMapSize(dex_registers, number_of_dex_registers)); + DexRegisterLocation location0 = dex_registers.GetLocationKindAndValue(0); + DexRegisterLocation location1 = dex_registers.GetLocationKindAndValue(1); + ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind()); + ASSERT_EQ(0, location0.GetValue()); + ASSERT_EQ(-2, location1.GetValue()); ASSERT_FALSE(stack_map.HasInlineInfo()); } @@ -89,8 +92,8 @@ TEST(StackMapTest, Test2) { sp_mask1.SetBit(4); size_t number_of_dex_registers = 2; stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2); - stream.AddDexRegisterEntry(DexRegisterMap::kInStack, 0); - stream.AddDexRegisterEntry(DexRegisterMap::kConstant, -2); + stream.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, 0); + stream.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, -2); stream.AddInlineInfoEntry(42); stream.AddInlineInfoEntry(82); @@ -98,8 +101,8 @@ TEST(StackMapTest, Test2) { sp_mask2.SetBit(3); sp_mask1.SetBit(8); stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(DexRegisterMap::kInRegister, 18); - stream.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, 3); + stream.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, 18); + stream.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, 3); size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -111,54 +114,66 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); // First stack map. - StackMap stack_map = code_info.GetStackMapAt(0); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); - ASSERT_EQ(0u, stack_map.GetDexPc()); - ASSERT_EQ(64u, stack_map.GetNativePcOffset()); - ASSERT_EQ(0x3u, stack_map.GetRegisterMask()); - - MemoryRegion stack_mask = stack_map.GetStackMask(); - ASSERT_TRUE(SameBits(stack_mask, sp_mask1)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap()); - DexRegisterMap dex_registers = - code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(16u, dex_registers.Size()); - ASSERT_EQ(16u, ComputeDexRegisterMapSize(number_of_dex_registers)); - ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0)); - ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1)); - ASSERT_EQ(0, dex_registers.GetValue(0)); - ASSERT_EQ(-2, dex_registers.GetValue(1)); - - ASSERT_TRUE(stack_map.HasInlineInfo()); - InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map); - ASSERT_EQ(2u, inline_info.GetDepth()); - ASSERT_EQ(42u, inline_info.GetMethodReferenceIndexAtDepth(0)); - ASSERT_EQ(82u, inline_info.GetMethodReferenceIndexAtDepth(1)); + { + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u, stack_map.GetNativePcOffset()); + ASSERT_EQ(0x3u, stack_map.GetRegisterMask()); + + MemoryRegion stack_mask = stack_map.GetStackMask(); + ASSERT_TRUE(SameBits(stack_mask, sp_mask1)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_registers = + code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); + ASSERT_EQ(6u, dex_registers.Size()); + ASSERT_EQ(6u, ComputeDexRegisterMapSize(dex_registers, number_of_dex_registers)); + DexRegisterLocation location0 = dex_registers.GetLocationKindAndValue(0); + DexRegisterLocation location1 = dex_registers.GetLocationKindAndValue(1); + ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind()); + ASSERT_EQ(0, location0.GetValue()); + ASSERT_EQ(-2, location1.GetValue()); + + ASSERT_TRUE(stack_map.HasInlineInfo()); + InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map); + ASSERT_EQ(2u, inline_info.GetDepth()); + ASSERT_EQ(42u, inline_info.GetMethodReferenceIndexAtDepth(0)); + ASSERT_EQ(82u, inline_info.GetMethodReferenceIndexAtDepth(1)); + } // Second stack map. - stack_map = code_info.GetStackMapAt(1); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u))); - ASSERT_EQ(1u, stack_map.GetDexPc()); - ASSERT_EQ(128u, stack_map.GetNativePcOffset()); - ASSERT_EQ(0xFFu, stack_map.GetRegisterMask()); - - stack_mask = stack_map.GetStackMask(); - ASSERT_TRUE(SameBits(stack_mask, sp_mask2)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap()); - dex_registers = - code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(16u, dex_registers.Size()); - ASSERT_EQ(16u, ComputeDexRegisterMapSize(number_of_dex_registers)); - ASSERT_EQ(DexRegisterMap::kInRegister, dex_registers.GetLocationKind(0)); - ASSERT_EQ(DexRegisterMap::kInFpuRegister, dex_registers.GetLocationKind(1)); - ASSERT_EQ(18, dex_registers.GetValue(0)); - ASSERT_EQ(3, dex_registers.GetValue(1)); - - ASSERT_FALSE(stack_map.HasInlineInfo()); + { + StackMap stack_map = code_info.GetStackMapAt(1); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u))); + ASSERT_EQ(1u, stack_map.GetDexPc()); + ASSERT_EQ(128u, stack_map.GetNativePcOffset()); + ASSERT_EQ(0xFFu, stack_map.GetRegisterMask()); + + MemoryRegion stack_mask = stack_map.GetStackMask(); + ASSERT_TRUE(SameBits(stack_mask, sp_mask2)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_registers = + code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); + ASSERT_EQ(2u, dex_registers.Size()); + ASSERT_EQ(2u, ComputeDexRegisterMapSize(dex_registers, number_of_dex_registers)); + DexRegisterLocation location0 = dex_registers.GetLocationKindAndValue(0); + DexRegisterLocation location1 = dex_registers.GetLocationKindAndValue(1); + ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetInternalKind()); + ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetInternalKind()); + ASSERT_EQ(18, location0.GetValue()); + ASSERT_EQ(3, location1.GetValue()); + + ASSERT_FALSE(stack_map.HasInlineInfo()); + } } } // namespace art diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index aab4f8bc0c..9ae3b79f62 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -1039,6 +1039,33 @@ class OatDumper { } } + void DumpRegisterMapping(std::ostream& os, + size_t dex_register_num, + DexRegisterLocation::Kind kind, + int32_t value, + const std::string& prefix = "v", + const std::string& suffix = "") { + os << " " << prefix << dex_register_num << ": " + << DexRegisterLocation::PrettyDescriptor(kind) + << " (" << value << ")" << suffix << '\n'; + } + + void DumpStackMapHeader(std::ostream& os, const CodeInfo& code_info, size_t stack_map_num) { + StackMap stack_map = code_info.GetStackMapAt(stack_map_num); + os << " StackMap " << stack_map_num + << std::hex + << " (dex_pc=0x" << stack_map.GetDexPc() + << ", native_pc_offset=0x" << stack_map.GetNativePcOffset() + << ", register_mask=0x" << stack_map.GetRegisterMask() + << std::dec + << ", stack_mask=0b"; + MemoryRegion stack_mask = stack_map.GetStackMask(); + for (size_t i = 0, e = stack_mask.size_in_bits(); i < e; ++i) { + os << stack_mask.LoadBit(e - i - 1); + } + os << ")\n"; + }; + // Display a CodeInfo object emitted by the optimizing compiler. void DumpCodeInfo(std::ostream& os, const CodeInfo& code_info, @@ -1049,27 +1076,21 @@ class OatDumper { os << " Optimized CodeInfo (size=" << code_info_size << ", number_of_dex_registers=" << number_of_dex_registers << ", number_of_stack_maps=" << number_of_stack_maps << ")\n"; + + // Display stack maps along with Dex register maps. for (size_t i = 0; i < number_of_stack_maps; ++i) { StackMap stack_map = code_info.GetStackMapAt(i); - // TODO: Display stack_mask value. - os << " StackMap " << i - << std::hex - << " (dex_pc=0x" << stack_map.GetDexPc() - << ", native_pc_offset=0x" << stack_map.GetNativePcOffset() - << ", register_mask=0x" << stack_map.GetRegisterMask() - << std::dec - << ")\n"; + DumpStackMapHeader(os, code_info, i); if (stack_map.HasDexRegisterMap()) { DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); for (size_t j = 0; j < number_of_dex_registers; ++j) { - os << " v" << j << ": " - << DexRegisterMap::PrettyDescriptor(dex_register_map.GetLocationKind(j)) - << " (" << dex_register_map.GetValue(j) << ")\n"; + DexRegisterLocation location = dex_register_map.GetLocationKindAndValue(j); + DumpRegisterMapping(os, j, location.GetInternalKind(), location.GetValue()); } } - // TODO: Display more information from code_info. } + // TODO: Dump the stack map's inline information. } // Display a vmap table. diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 65c65e2b72..0f874a49e8 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1551,7 +1551,9 @@ DEFINE_FUNCTION art_quick_instrumentation_exit CFI_ADJUST_CFA_OFFSET(-8) POP rax // Restore integer result. - addq LITERAL(FRAME_SIZE_REFS_ONLY_CALLEE_SAVE), %rsp // Drop save frame and fake return pc. + RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME + + addq LITERAL(8), %rsp // Drop fake return pc. jmp *%rdi // Return. END_FUNCTION art_quick_instrumentation_exit diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h index 93062a7c4b..893ab11bad 100644 --- a/runtime/check_reference_map_visitor.h +++ b/runtime/check_reference_map_visitor.h @@ -66,31 +66,36 @@ class CheckReferenceMapVisitor : public StackVisitor { mirror::ArtMethod* m = GetMethod(); CodeInfo code_info = m->GetOptimizedCodeInfo(); StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset); - DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, m->GetCodeItem()->registers_size_); + DexRegisterMap dex_register_map = + code_info.GetDexRegisterMapOf(stack_map, m->GetCodeItem()->registers_size_); MemoryRegion stack_mask = stack_map.GetStackMask(); uint32_t register_mask = stack_map.GetRegisterMask(); for (int i = 0; i < number_of_references; ++i) { int reg = registers[i]; CHECK(reg < m->GetCodeItem()->registers_size_); - DexRegisterMap::LocationKind location = dex_register_map.GetLocationKind(reg); - switch (location) { - case DexRegisterMap::kNone: + DexRegisterLocation location = dex_register_map.GetLocationKindAndValue(reg); + switch (location.GetKind()) { + case DexRegisterLocation::Kind::kNone: // Not set, should not be a reference. CHECK(false); break; - case DexRegisterMap::kInStack: - CHECK(stack_mask.LoadBit(dex_register_map.GetValue(reg) >> 2)); + case DexRegisterLocation::Kind::kInStack: + DCHECK_EQ(location.GetValue() % kFrameSlotSize, 0); + CHECK(stack_mask.LoadBit(location.GetValue() / kFrameSlotSize)); break; - case DexRegisterMap::kInRegister: - CHECK_NE(register_mask & (1 << dex_register_map.GetValue(reg)), 0u); + case DexRegisterLocation::Kind::kInRegister: + CHECK_NE(register_mask & (1 << location.GetValue()), 0u); break; - case DexRegisterMap::kInFpuRegister: + case DexRegisterLocation::Kind::kInFpuRegister: // In Fpu register, should not be a reference. CHECK(false); break; - case DexRegisterMap::kConstant: - CHECK_EQ(dex_register_map.GetValue(reg), 0); + case DexRegisterLocation::Kind::kConstant: + CHECK_EQ(location.GetValue(), 0); break; + default: + LOG(FATAL) << "Unexpected location kind" + << DexRegisterLocation::PrettyDescriptor(location.GetInternalKind()); } } } diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc index 84865973c6..e0d62d7012 100644 --- a/runtime/common_runtime_test.cc +++ b/runtime/common_runtime_test.cc @@ -263,6 +263,8 @@ void CommonRuntimeTest::SetUp() { // pool is created by the runtime. runtime_->GetHeap()->CreateThreadPool(); runtime_->GetHeap()->VerifyHeap(); // Check for heap corruption before the test + // Reduce timinig-dependent flakiness in OOME behavior (eg StubTest.AllocObject). + runtime_->GetHeap()->SetMinIntervalHomogeneousSpaceCompactionByOom(0U); // Get the boot class path from the runtime so it can be used in tests. boot_class_path_ = class_linker_->GetBootClassPath(); diff --git a/runtime/gc/accounting/mod_union_table_test.cc b/runtime/gc/accounting/mod_union_table_test.cc index 87ce166147..77809358e4 100644 --- a/runtime/gc/accounting/mod_union_table_test.cc +++ b/runtime/gc/accounting/mod_union_table_test.cc @@ -48,9 +48,9 @@ class ModUnionTableTest : public CommonRuntimeTest { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { auto* klass = GetObjectArrayClass(self, space); const size_t size = ComputeArraySize(self, klass, component_count, 2); - size_t bytes_allocated = 0; + size_t bytes_allocated = 0, bytes_tl_bulk_allocated; auto* obj = down_cast<mirror::ObjectArray<mirror::Object>*>( - space->Alloc(self, size, &bytes_allocated, nullptr)); + space->Alloc(self, size, &bytes_allocated, nullptr, &bytes_tl_bulk_allocated)); if (obj != nullptr) { obj->SetClass(klass); obj->SetLength(static_cast<int32_t>(component_count)); @@ -77,9 +77,10 @@ class ModUnionTableTest : public CommonRuntimeTest { // copy of the class in the same space that we are allocating in. DCHECK(java_lang_object_array_ != nullptr); const size_t class_size = java_lang_object_array_->GetClassSize(); - size_t bytes_allocated = 0; + size_t bytes_allocated = 0, bytes_tl_bulk_allocated; auto* klass = down_cast<mirror::Class*>(space->Alloc(self, class_size, &bytes_allocated, - nullptr)); + nullptr, + &bytes_tl_bulk_allocated)); DCHECK(klass != nullptr); memcpy(klass, java_lang_object_array_, class_size); Runtime::Current()->GetHeap()->GetCardTable()->MarkCard(klass); diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h index f6c9d3c144..bba92a1f40 100644 --- a/runtime/gc/allocator/rosalloc-inl.h +++ b/runtime/gc/allocator/rosalloc-inl.h @@ -28,15 +28,19 @@ inline ALWAYS_INLINE bool RosAlloc::ShouldCheckZeroMemory() { } template<bool kThreadSafe> -inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated) { +inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { if (UNLIKELY(size > kLargeSizeThreshold)) { - return AllocLargeObject(self, size, bytes_allocated); + return AllocLargeObject(self, size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } void* m; if (kThreadSafe) { - m = AllocFromRun(self, size, bytes_allocated); + m = AllocFromRun(self, size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } else { - m = AllocFromRunThreadUnsafe(self, size, bytes_allocated); + m = AllocFromRunThreadUnsafe(self, size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } // Check if the returned memory is really all zero. if (ShouldCheckZeroMemory() && m != nullptr) { @@ -48,6 +52,115 @@ inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* by return m; } +inline bool RosAlloc::Run::IsFull() { + const size_t num_vec = NumberOfBitmapVectors(); + for (size_t v = 0; v < num_vec; ++v) { + if (~alloc_bit_map_[v] != 0) { + return false; + } + } + return true; +} + +inline bool RosAlloc::CanAllocFromThreadLocalRun(Thread* self, size_t size) { + if (UNLIKELY(!IsSizeForThreadLocal(size))) { + return false; + } + size_t bracket_size; + size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); + DCHECK_EQ(idx, SizeToIndex(size)); + DCHECK_EQ(bracket_size, IndexToBracketSize(idx)); + DCHECK_EQ(bracket_size, bracketSizes[idx]); + DCHECK_LE(size, bracket_size); + DCHECK(size > 512 || bracket_size - size < 16); + DCHECK_LT(idx, kNumThreadLocalSizeBrackets); + Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx)); + if (kIsDebugBuild) { + // Need the lock to prevent race conditions. + MutexLock mu(self, *size_bracket_locks_[idx]); + CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); + CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); + } + DCHECK(thread_local_run != nullptr); + DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_); + return !thread_local_run->IsFull(); +} + +inline void* RosAlloc::AllocFromThreadLocalRun(Thread* self, size_t size, + size_t* bytes_allocated) { + DCHECK(bytes_allocated != nullptr); + if (UNLIKELY(!IsSizeForThreadLocal(size))) { + return nullptr; + } + size_t bracket_size; + size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); + Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx)); + if (kIsDebugBuild) { + // Need the lock to prevent race conditions. + MutexLock mu(self, *size_bracket_locks_[idx]); + CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); + CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); + } + DCHECK(thread_local_run != nullptr); + DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_); + void* slot_addr = thread_local_run->AllocSlot(); + if (LIKELY(slot_addr != nullptr)) { + *bytes_allocated = bracket_size; + } + return slot_addr; +} + +inline size_t RosAlloc::MaxBytesBulkAllocatedFor(size_t size) { + if (UNLIKELY(!IsSizeForThreadLocal(size))) { + return size; + } + size_t bracket_size; + size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); + return numOfSlots[idx] * bracket_size; +} + +inline void* RosAlloc::Run::AllocSlot() { + const size_t idx = size_bracket_idx_; + while (true) { + if (kIsDebugBuild) { + // Make sure that no slots leaked, the bitmap should be full for all previous vectors. + for (size_t i = 0; i < first_search_vec_idx_; ++i) { + CHECK_EQ(~alloc_bit_map_[i], 0U); + } + } + uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_]; + uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr); + if (LIKELY(ffz1 != 0)) { + const uint32_t ffz = ffz1 - 1; + const uint32_t slot_idx = ffz + + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte; + const uint32_t mask = 1U << ffz; + DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range"; + // Found an empty slot. Set the bit. + DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U); + *alloc_bitmap_ptr |= mask; + DCHECK_NE(*alloc_bitmap_ptr & mask, 0U); + uint8_t* slot_addr = reinterpret_cast<uint8_t*>(this) + + headerSizes[idx] + slot_idx * bracketSizes[idx]; + if (kTraceRosAlloc) { + LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex + << reinterpret_cast<intptr_t>(slot_addr) + << ", bracket_size=" << std::dec << bracketSizes[idx] + << ", slot_idx=" << slot_idx; + } + return slot_addr; + } + const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32; + if (first_search_vec_idx_ + 1 >= num_words) { + DCHECK(IsFull()); + // Already at the last word, return null. + return nullptr; + } + // Increase the index to the next word and try again. + ++first_search_vec_idx_; + } +} + } // namespace allocator } // namespace gc } // namespace art diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc index f51093aa57..f64a4ff8df 100644 --- a/runtime/gc/allocator/rosalloc.cc +++ b/runtime/gc/allocator/rosalloc.cc @@ -454,7 +454,10 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) { return byte_size; } -void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) { +void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, size_t* bytes_tl_bulk_allocated) { + DCHECK(bytes_allocated != nullptr); + DCHECK(usable_size != nullptr); DCHECK_GT(size, kLargeSizeThreshold); size_t num_pages = RoundUp(size, kPageSize) / kPageSize; void* r; @@ -470,6 +473,8 @@ void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_alloca } const size_t total_bytes = num_pages * kPageSize; *bytes_allocated = total_bytes; + *usable_size = total_bytes; + *bytes_tl_bulk_allocated = total_bytes; if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r) << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize) @@ -622,7 +627,12 @@ inline void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) { return slot_addr; } -void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) { +void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { + DCHECK(bytes_allocated != nullptr); + DCHECK(usable_size != nullptr); + DCHECK(bytes_tl_bulk_allocated != nullptr); DCHECK_LE(size, kLargeSizeThreshold); size_t bracket_size; size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); @@ -634,14 +644,19 @@ void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* byte Locks::mutator_lock_->AssertExclusiveHeld(self); void* slot_addr = AllocFromCurrentRunUnlocked(self, idx); if (LIKELY(slot_addr != nullptr)) { - DCHECK(bytes_allocated != nullptr); *bytes_allocated = bracket_size; - // Caller verifies that it is all 0. + *usable_size = bracket_size; + *bytes_tl_bulk_allocated = bracket_size; } + // Caller verifies that it is all 0. return slot_addr; } -void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) { +void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, size_t* bytes_tl_bulk_allocated) { + DCHECK(bytes_allocated != nullptr); + DCHECK(usable_size != nullptr); + DCHECK(bytes_tl_bulk_allocated != nullptr); DCHECK_LE(size, kLargeSizeThreshold); size_t bracket_size; size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); @@ -712,31 +727,43 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) self->SetRosAllocRun(idx, thread_local_run); DCHECK(!thread_local_run->IsFull()); } - DCHECK(thread_local_run != nullptr); DCHECK(!thread_local_run->IsFull()); DCHECK(thread_local_run->IsThreadLocal()); + // Account for all the free slots in the new or refreshed thread local run. + *bytes_tl_bulk_allocated = thread_local_run->NumberOfFreeSlots() * bracket_size; slot_addr = thread_local_run->AllocSlot(); // Must succeed now with a new run. DCHECK(slot_addr != nullptr); + } else { + // The slot is already counted. Leave it as is. + *bytes_tl_bulk_allocated = 0; } + DCHECK(slot_addr != nullptr); if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) + LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex + << reinterpret_cast<intptr_t>(slot_addr) << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size) << "(" << std::dec << (bracket_size) << ")"; } + *bytes_allocated = bracket_size; + *usable_size = bracket_size; } else { // Use the (shared) current run. MutexLock mu(self, *size_bracket_locks_[idx]); slot_addr = AllocFromCurrentRunUnlocked(self, idx); if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) + LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex + << reinterpret_cast<intptr_t>(slot_addr) << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size) << "(" << std::dec << (bracket_size) << ")"; } + if (LIKELY(slot_addr != nullptr)) { + *bytes_allocated = bracket_size; + *usable_size = bracket_size; + *bytes_tl_bulk_allocated = bracket_size; + } } - DCHECK(bytes_allocated != nullptr); - *bytes_allocated = bracket_size; // Caller verifies that it is all 0. return slot_addr; } @@ -852,44 +879,6 @@ std::string RosAlloc::Run::Dump() { return stream.str(); } -inline void* RosAlloc::Run::AllocSlot() { - const size_t idx = size_bracket_idx_; - while (true) { - if (kIsDebugBuild) { - // Make sure that no slots leaked, the bitmap should be full for all previous vectors. - for (size_t i = 0; i < first_search_vec_idx_; ++i) { - CHECK_EQ(~alloc_bit_map_[i], 0U); - } - } - uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_]; - uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr); - if (LIKELY(ffz1 != 0)) { - const uint32_t ffz = ffz1 - 1; - const uint32_t slot_idx = ffz + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte; - const uint32_t mask = 1U << ffz; - DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range"; - // Found an empty slot. Set the bit. - DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U); - *alloc_bitmap_ptr |= mask; - DCHECK_NE(*alloc_bitmap_ptr & mask, 0U); - uint8_t* slot_addr = reinterpret_cast<uint8_t*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx]; - if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) - << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx; - } - return slot_addr; - } - const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32; - if (first_search_vec_idx_ + 1 >= num_words) { - DCHECK(IsFull()); - // Already at the last word, return null. - return nullptr; - } - // Increase the index to the next word and try again. - ++first_search_vec_idx_; - } -} - void RosAlloc::Run::FreeSlot(void* ptr) { DCHECK(!IsThreadLocal()); const uint8_t idx = size_bracket_idx_; @@ -920,6 +909,25 @@ void RosAlloc::Run::FreeSlot(void* ptr) { } } +size_t RosAlloc::Run::NumberOfFreeSlots() { + size_t num_alloc_slots = 0; + const size_t idx = size_bracket_idx_; + const size_t num_slots = numOfSlots[idx]; + const size_t num_vec = RoundUp(num_slots, 32) / 32; + DCHECK_NE(num_vec, 0U); + for (size_t v = 0; v < num_vec - 1; v++) { + num_alloc_slots += POPCOUNT(alloc_bit_map_[v]); + } + // Don't count the invalid bits in the last vector. + uint32_t last_vec_masked = alloc_bit_map_[num_vec - 1] & + ~GetBitmapLastVectorMask(num_slots, num_vec); + num_alloc_slots += POPCOUNT(last_vec_masked); + size_t num_free_slots = num_slots - num_alloc_slots; + DCHECK_LE(num_alloc_slots, num_slots); + DCHECK_LE(num_free_slots, num_slots); + return num_free_slots; +} + inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) { DCHECK(IsThreadLocal()); // Free slots in the alloc bit map based on the thread local free bit map. @@ -1055,16 +1063,6 @@ inline bool RosAlloc::Run::IsAllFree() { return alloc_bit_map_[num_vec - 1] == GetBitmapLastVectorMask(num_slots, num_vec); } -inline bool RosAlloc::Run::IsFull() { - const size_t num_vec = NumberOfBitmapVectors(); - for (size_t v = 0; v < num_vec; ++v) { - if (~alloc_bit_map_[v] != 0) { - return false; - } - } - return true; -} - inline bool RosAlloc::Run::IsBulkFreeBitmapClean() { const size_t num_vec = NumberOfBitmapVectors(); for (size_t v = 0; v < num_vec; v++) { @@ -1654,10 +1652,11 @@ void RosAlloc::SetFootprintLimit(size_t new_capacity) { } } -void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { +size_t RosAlloc::RevokeThreadLocalRuns(Thread* thread) { Thread* self = Thread::Current(); // Avoid race conditions on the bulk free bit maps with BulkFree() (GC). ReaderMutexLock wmu(self, bulk_free_lock_); + size_t free_bytes = 0U; for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) { MutexLock mu(self, *size_bracket_locks_[idx]); Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx)); @@ -1665,9 +1664,12 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { // Invalid means already revoked. DCHECK(thread_local_run->IsThreadLocal()); if (thread_local_run != dedicated_full_run_) { + // Note the thread local run may not be full here. thread->SetRosAllocRun(idx, dedicated_full_run_); DCHECK_EQ(thread_local_run->magic_num_, kMagicNum); - // Note the thread local run may not be full here. + // Count the number of free slots left. + size_t num_free_slots = thread_local_run->NumberOfFreeSlots(); + free_bytes += num_free_slots * bracketSizes[idx]; bool dont_care; thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care); thread_local_run->SetIsThreadLocal(false); @@ -1677,6 +1679,7 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { RevokeRun(self, idx, thread_local_run); } } + return free_bytes; } void RosAlloc::RevokeRun(Thread* self, size_t idx, Run* run) { @@ -1719,16 +1722,18 @@ void RosAlloc::RevokeThreadUnsafeCurrentRuns() { } } -void RosAlloc::RevokeAllThreadLocalRuns() { +size_t RosAlloc::RevokeAllThreadLocalRuns() { // This is called when a mutator thread won't allocate such as at // the Zygote creation time or during the GC pause. MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_); MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_); std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList(); + size_t free_bytes = 0U; for (Thread* thread : thread_list) { - RevokeThreadLocalRuns(thread); + free_bytes += RevokeThreadLocalRuns(thread); } RevokeThreadUnsafeCurrentRuns(); + return free_bytes; } void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) { diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h index 3269e102bc..d1e7ad91a0 100644 --- a/runtime/gc/allocator/rosalloc.h +++ b/runtime/gc/allocator/rosalloc.h @@ -230,8 +230,10 @@ class RosAlloc { static uint32_t GetBitmapLastVectorMask(size_t num_slots, size_t num_vec); // Returns true if all the slots in the run are not in use. bool IsAllFree(); + // Returns the number of free slots. + size_t NumberOfFreeSlots(); // Returns true if all the slots in the run are in use. - bool IsFull(); + ALWAYS_INLINE bool IsFull(); // Returns true if the bulk free bit map is clean. bool IsBulkFreeBitmapClean(); // Returns true if the thread local free bit map is clean. @@ -309,6 +311,15 @@ class RosAlloc { DCHECK(bracketSizes[idx] == size); return idx; } + // Returns true if the given allocation size is for a thread local allocation. + static bool IsSizeForThreadLocal(size_t size) { + DCHECK_GT(kNumThreadLocalSizeBrackets, 0U); + size_t max_thread_local_bracket_idx = kNumThreadLocalSizeBrackets - 1; + bool is_size_for_thread_local = size <= bracketSizes[max_thread_local_bracket_idx]; + DCHECK(size > kLargeSizeThreshold || + (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets))); + return is_size_for_thread_local; + } // Rounds up the size up the nearest bracket size. static size_t RoundToBracketSize(size_t size) { DCHECK(size <= kLargeSizeThreshold); @@ -504,11 +515,13 @@ class RosAlloc { size_t FreePages(Thread* self, void* ptr, bool already_zero) EXCLUSIVE_LOCKS_REQUIRED(lock_); // Allocate/free a run slot. - void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) + void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) LOCKS_EXCLUDED(lock_); // Allocate/free a run slot without acquiring locks. // TODO: EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) - void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) + void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, size_t* bytes_tl_bulk_allocated) LOCKS_EXCLUDED(lock_); void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx); @@ -527,7 +540,9 @@ class RosAlloc { size_t FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_); // Allocates large objects. - void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_); + void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + LOCKS_EXCLUDED(lock_); // Revoke a run by adding it to non_full_runs_ or freeing the pages. void RevokeRun(Thread* self, size_t idx, Run* run); @@ -551,13 +566,26 @@ class RosAlloc { // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization. // If used, this may cause race conditions if multiple threads are allocating at the same time. template<bool kThreadSafe = true> - void* Alloc(Thread* self, size_t size, size_t* bytes_allocated) + void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) LOCKS_EXCLUDED(lock_); size_t Free(Thread* self, void* ptr) LOCKS_EXCLUDED(bulk_free_lock_); size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs) LOCKS_EXCLUDED(bulk_free_lock_); + // Returns true if the given allocation request can be allocated in + // an existing thread local run without allocating a new run. + ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size); + // Allocate the given allocation request in an existing thread local + // run without allocating a new run. + ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated); + + // Returns the maximum bytes that could be allocated for the given + // size in bulk, that is the maximum value for the + // bytes_allocated_bulk out param returned by RosAlloc::Alloc(). + ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size); + // Returns the size of the allocated slot for a given allocated memory chunk. size_t UsableSize(const void* ptr); // Returns the size of the allocated slot for a given size. @@ -586,9 +614,13 @@ class RosAlloc { void SetFootprintLimit(size_t bytes) LOCKS_EXCLUDED(lock_); // Releases the thread-local runs assigned to the given thread back to the common set of runs. - void RevokeThreadLocalRuns(Thread* thread); + // Returns the total bytes of free slots in the revoked thread local runs. This is to be + // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. + size_t RevokeThreadLocalRuns(Thread* thread); // Releases the thread-local runs assigned to all the threads back to the common set of runs. - void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_); + // Returns the total bytes of free slots in the revoked thread local runs. This is to be + // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. + size_t RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_); // Assert the thread local runs of a thread are revoked. void AssertThreadLocalRunsAreRevoked(Thread* thread); // Assert all the thread local runs are revoked. diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index dd45ecab7f..db7a4ef7e7 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -1259,8 +1259,9 @@ mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) { size_t region_space_bytes_allocated = 0U; size_t non_moving_space_bytes_allocated = 0U; size_t bytes_allocated = 0U; + size_t dummy; mirror::Object* to_ref = region_space_->AllocNonvirtual<true>( - region_space_alloc_size, ®ion_space_bytes_allocated, nullptr); + region_space_alloc_size, ®ion_space_bytes_allocated, nullptr, &dummy); bytes_allocated = region_space_bytes_allocated; if (to_ref != nullptr) { DCHECK_EQ(region_space_alloc_size, region_space_bytes_allocated); @@ -1286,7 +1287,7 @@ mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) { } fall_back_to_non_moving = true; to_ref = heap_->non_moving_space_->Alloc(Thread::Current(), obj_size, - &non_moving_space_bytes_allocated, nullptr); + &non_moving_space_bytes_allocated, nullptr, &dummy); CHECK(to_ref != nullptr) << "Fall-back non-moving space allocation failed"; bytes_allocated = non_moving_space_bytes_allocated; // Mark it in the mark bitmap. diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc index 8be18be676..eafcc45a13 100644 --- a/runtime/gc/collector/garbage_collector.cc +++ b/runtime/gc/collector/garbage_collector.cc @@ -48,6 +48,7 @@ void Iteration::Reset(GcCause gc_cause, bool clear_soft_references) { gc_cause_ = gc_cause; freed_ = ObjectBytePair(); freed_los_ = ObjectBytePair(); + freed_bytes_revoke_ = 0; } uint64_t Iteration::GetEstimatedThroughput() const { diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h index b8094694b0..ed5207a356 100644 --- a/runtime/gc/collector/garbage_collector.h +++ b/runtime/gc/collector/garbage_collector.h @@ -75,6 +75,12 @@ class Iteration { uint64_t GetFreedLargeObjects() const { return freed_los_.objects; } + uint64_t GetFreedRevokeBytes() const { + return freed_bytes_revoke_; + } + void SetFreedRevoke(uint64_t freed) { + freed_bytes_revoke_ = freed; + } void Reset(GcCause gc_cause, bool clear_soft_references); // Returns the estimated throughput of the iteration. uint64_t GetEstimatedThroughput() const; @@ -99,6 +105,7 @@ class Iteration { TimingLogger timings_; ObjectBytePair freed_; ObjectBytePair freed_los_; + uint64_t freed_bytes_revoke_; // see Heap::num_bytes_freed_revoke_. std::vector<uint64_t> pause_times_; friend class GarbageCollector; diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index 8aac484f7f..ee4e752608 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -292,6 +292,7 @@ void MarkSweep::ReclaimPhase() { Runtime::Current()->AllowNewSystemWeaks(); { WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); + GetHeap()->RecordFreeRevoke(); // Reclaim unmarked objects. Sweep(false); // Swap the live and mark bitmaps for each space which we modified space. This is an diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc index c1ba5e3f72..b3d59f2a51 100644 --- a/runtime/gc/collector/semi_space.cc +++ b/runtime/gc/collector/semi_space.cc @@ -242,6 +242,7 @@ void SemiSpace::MarkingPhase() { // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked // before they are properly counted. RevokeAllThreadLocalBuffers(); + GetHeap()->RecordFreeRevoke(); // this is for the non-moving rosalloc space used by GSS. // Record freed memory. const int64_t from_bytes = from_space_->GetBytesAllocated(); const int64_t to_bytes = bytes_moved_; @@ -489,17 +490,18 @@ static inline size_t CopyAvoidingDirtyingPages(void* dest, const void* src, size mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) { const size_t object_size = obj->SizeOf(); - size_t bytes_allocated; + size_t bytes_allocated, dummy; mirror::Object* forward_address = nullptr; if (generational_ && reinterpret_cast<uint8_t*>(obj) < last_gc_to_space_end_) { // If it's allocated before the last GC (older), move // (pseudo-promote) it to the main free list space (as sort // of an old generation.) forward_address = promo_dest_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, - nullptr); + nullptr, &dummy); if (UNLIKELY(forward_address == nullptr)) { // If out of space, fall back to the to-space. - forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr); + forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr, + &dummy); // No logic for marking the bitmap, so it must be null. DCHECK(to_space_live_bitmap_ == nullptr); } else { @@ -544,7 +546,8 @@ mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) { } } else { // If it's allocated after the last GC (younger), copy it to the to-space. - forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr); + forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr, + &dummy); if (forward_address != nullptr && to_space_live_bitmap_ != nullptr) { to_space_live_bitmap_->Set(forward_address); } @@ -552,7 +555,7 @@ mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) { // If it's still null, attempt to use the fallback space. if (UNLIKELY(forward_address == nullptr)) { forward_address = fallback_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, - nullptr); + nullptr, &dummy); CHECK(forward_address != nullptr) << "Out of memory in the to-space and fallback space."; accounting::ContinuousSpaceBitmap* bitmap = fallback_space_->GetLiveBitmap(); if (bitmap != nullptr) { diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index b8c24521a2..b770096671 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -64,6 +64,7 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas // fragmentation. } AllocationTimer alloc_timer(this, &obj); + // bytes allocated for the (individual) object. size_t bytes_allocated; size_t usable_size; size_t new_num_bytes_allocated = 0; @@ -86,13 +87,29 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas usable_size = bytes_allocated; pre_fence_visitor(obj, usable_size); QuasiAtomic::ThreadFenceForConstructor(); + } else if (!kInstrumented && allocator == kAllocatorTypeRosAlloc && + (obj = rosalloc_space_->AllocThreadLocal(self, byte_count, &bytes_allocated)) && + LIKELY(obj != nullptr)) { + DCHECK(!running_on_valgrind_); + obj->SetClass(klass); + if (kUseBakerOrBrooksReadBarrier) { + if (kUseBrooksReadBarrier) { + obj->SetReadBarrierPointer(obj); + } + obj->AssertReadBarrierPointer(); + } + usable_size = bytes_allocated; + pre_fence_visitor(obj, usable_size); + QuasiAtomic::ThreadFenceForConstructor(); } else { + // bytes allocated that takes bulk thread-local buffer allocations into account. + size_t bytes_tl_bulk_allocated = 0; obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated, - &usable_size); + &usable_size, &bytes_tl_bulk_allocated); if (UNLIKELY(obj == nullptr)) { bool is_current_allocator = allocator == GetCurrentAllocator(); obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated, &usable_size, - &klass); + &bytes_tl_bulk_allocated, &klass); if (obj == nullptr) { bool after_is_current_allocator = allocator == GetCurrentAllocator(); // If there is a pending exception, fail the allocation right away since the next one @@ -126,9 +143,9 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas WriteBarrierField(obj, mirror::Object::ClassOffset(), klass); } pre_fence_visitor(obj, usable_size); - new_num_bytes_allocated = - static_cast<size_t>(num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_allocated)) - + bytes_allocated; + new_num_bytes_allocated = static_cast<size_t>( + num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_tl_bulk_allocated)) + + bytes_tl_bulk_allocated; } if (kIsDebugBuild && Runtime::Current()->IsStarted()) { CHECK_LE(obj->SizeOf(), usable_size); @@ -196,8 +213,10 @@ inline mirror::Object* Heap::AllocLargeObject(Thread* self, mirror::Class** klas template <const bool kInstrumented, const bool kGrow> inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator_type, size_t alloc_size, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { if (allocator_type != kAllocatorTypeTLAB && allocator_type != kAllocatorTypeRegionTLAB && + allocator_type != kAllocatorTypeRosAlloc && UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) { return nullptr; } @@ -210,35 +229,56 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator if (LIKELY(ret != nullptr)) { *bytes_allocated = alloc_size; *usable_size = alloc_size; + *bytes_tl_bulk_allocated = alloc_size; } break; } case kAllocatorTypeRosAlloc: { if (kInstrumented && UNLIKELY(running_on_valgrind_)) { // If running on valgrind, we should be using the instrumented path. - ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size); + size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedFor(alloc_size); + if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, + max_bytes_tl_bulk_allocated))) { + return nullptr; + } + ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } else { DCHECK(!running_on_valgrind_); - ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size); + size_t max_bytes_tl_bulk_allocated = + rosalloc_space_->MaxBytesBulkAllocatedForNonvirtual(alloc_size); + if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, + max_bytes_tl_bulk_allocated))) { + return nullptr; + } + if (!kInstrumented) { + DCHECK(!rosalloc_space_->CanAllocThreadLocal(self, alloc_size)); + } + ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } break; } case kAllocatorTypeDlMalloc: { if (kInstrumented && UNLIKELY(running_on_valgrind_)) { // If running on valgrind, we should be using the instrumented path. - ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size); + ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } else { DCHECK(!running_on_valgrind_); - ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size); + ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } break; } case kAllocatorTypeNonMoving: { - ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size); + ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); break; } case kAllocatorTypeLOS: { - ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size); + ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); // Note that the bump pointer spaces aren't necessarily next to // the other continuous spaces like the non-moving alloc space or // the zygote space. @@ -257,20 +297,22 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) { return nullptr; } - *bytes_allocated = new_tlab_size; + *bytes_tl_bulk_allocated = new_tlab_size; } else { - *bytes_allocated = 0; + *bytes_tl_bulk_allocated = 0; } // The allocation can't fail. ret = self->AllocTlab(alloc_size); DCHECK(ret != nullptr); + *bytes_allocated = alloc_size; *usable_size = alloc_size; break; } case kAllocatorTypeRegion: { DCHECK(region_space_ != nullptr); alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment); - ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size); + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); break; } case kAllocatorTypeRegionTLAB: { @@ -283,15 +325,17 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator // Try to allocate a tlab. if (!region_space_->AllocNewTlab(self)) { // Failed to allocate a tlab. Try non-tlab. - ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size); + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); return ret; } - *bytes_allocated = space::RegionSpace::kRegionSize; + *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize; // Fall-through. } else { // Check OOME for a non-tlab allocation. if (!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size)) { - ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size); + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); return ret; } else { // Neither tlab or non-tlab works. Give up. @@ -301,18 +345,20 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator } else { // Large. Check OOME. if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) { - ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size); + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); return ret; } else { return nullptr; } } } else { - *bytes_allocated = 0; + *bytes_tl_bulk_allocated = 0; // Allocated in an existing buffer. } // The allocation can't fail. ret = self->AllocTlab(alloc_size); DCHECK(ret != nullptr); + *bytes_allocated = alloc_size; *usable_size = alloc_size; break; } diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 7534515a8a..9421db5139 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -156,6 +156,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max total_objects_freed_ever_(0), num_bytes_allocated_(0), native_bytes_allocated_(0), + num_bytes_freed_revoke_(0), verify_missing_card_marks_(false), verify_system_weaks_(false), verify_pre_gc_heap_(verify_pre_gc_heap), @@ -1344,6 +1345,19 @@ void Heap::RecordFree(uint64_t freed_objects, int64_t freed_bytes) { } } +void Heap::RecordFreeRevoke() { + // Subtract num_bytes_freed_revoke_ from num_bytes_allocated_ to cancel out the + // the ahead-of-time, bulk counting of bytes allocated in rosalloc thread-local buffers. + // If there's a concurrent revoke, ok to not necessarily reset num_bytes_freed_revoke_ + // all the way to zero exactly as the remainder will be subtracted at the next GC. + size_t bytes_freed = num_bytes_freed_revoke_.LoadSequentiallyConsistent(); + CHECK_GE(num_bytes_freed_revoke_.FetchAndSubSequentiallyConsistent(bytes_freed), + bytes_freed) << "num_bytes_freed_revoke_ underflow"; + CHECK_GE(num_bytes_allocated_.FetchAndSubSequentiallyConsistent(bytes_freed), + bytes_freed) << "num_bytes_allocated_ underflow"; + GetCurrentGcIteration()->SetFreedRevoke(bytes_freed); +} + space::RosAllocSpace* Heap::GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) const { for (const auto& space : continuous_spaces_) { if (space->AsContinuousSpace()->IsRosAllocSpace()) { @@ -1358,6 +1372,7 @@ space::RosAllocSpace* Heap::GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocator, size_t alloc_size, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated, mirror::Class** klass) { bool was_default_allocator = allocator == GetCurrentAllocator(); // Make sure there is no pending exception since we may need to throw an OOME. @@ -1377,7 +1392,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat } // A GC was in progress and we blocked, retry allocation now that memory has been freed. mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { return ptr; } @@ -1391,7 +1406,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat } if (gc_ran) { mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { return ptr; } @@ -1411,7 +1426,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat if (plan_gc_ran) { // Did we free sufficient memory for the allocation to succeed? mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { return ptr; } @@ -1420,7 +1435,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat // Allocations have failed after GCs; this is an exceptional state. // Try harder, growing the heap if necessary. mirror::Object* ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { return ptr; } @@ -1437,7 +1452,8 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat if (was_default_allocator && allocator != GetCurrentAllocator()) { return nullptr; } - ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size); + ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); if (ptr == nullptr) { const uint64_t current_time = NanoTime(); switch (allocator) { @@ -1453,7 +1469,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat case HomogeneousSpaceCompactResult::kSuccess: // If the allocation succeeded, we delayed an oom. ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { count_delayed_oom_++; } @@ -1498,7 +1514,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat } else { LOG(WARNING) << "Disabled moving GC due to the non moving space being full"; ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); } } break; @@ -1984,8 +2000,8 @@ class ZygoteCompactingCollector FINAL : public collector::SemiSpace { if (it == bins_.end()) { // No available space in the bins, place it in the target space instead (grows the zygote // space). - size_t bytes_allocated; - forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr); + size_t bytes_allocated, dummy; + forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr, &dummy); if (to_space_live_bitmap_ != nullptr) { to_space_live_bitmap_->Set(forward_address); } else { @@ -2048,8 +2064,6 @@ void Heap::PreZygoteFork() { non_moving_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE); const bool same_space = non_moving_space_ == main_space_; if (kCompactZygote) { - // Can't compact if the non moving space is the same as the main space. - DCHECK(semi_space_collector_ != nullptr); // Temporarily disable rosalloc verification because the zygote // compaction will mess up the rosalloc internal metadata. ScopedDisableRosAllocVerification disable_rosalloc_verif(this); @@ -2068,6 +2082,8 @@ void Heap::PreZygoteFork() { } } else { CHECK(main_space_ != nullptr); + CHECK_NE(main_space_, non_moving_space_) + << "Does not make sense to compact within the same space"; // Copy from the main space. zygote_collector.SetFromSpace(main_space_); reset_main_space = true; @@ -3084,7 +3100,8 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran, SetIdealFootprint(target_size); if (IsGcConcurrent()) { const uint64_t freed_bytes = current_gc_iteration_.GetFreedBytes() + - current_gc_iteration_.GetFreedLargeObjectBytes(); + current_gc_iteration_.GetFreedLargeObjectBytes() + + current_gc_iteration_.GetFreedRevokeBytes(); // Bytes allocated will shrink by freed_bytes after the GC runs, so if we want to figure out // how many bytes were allocated during the GC we need to add freed_bytes back on. CHECK_GE(bytes_allocated + freed_bytes, bytes_allocated_before_gc); @@ -3290,31 +3307,43 @@ void Heap::RequestTrim(Thread* self) { void Heap::RevokeThreadLocalBuffers(Thread* thread) { if (rosalloc_space_ != nullptr) { - rosalloc_space_->RevokeThreadLocalBuffers(thread); + size_t freed_bytes_revoke = rosalloc_space_->RevokeThreadLocalBuffers(thread); + if (freed_bytes_revoke > 0U) { + num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke); + CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed()); + } } if (bump_pointer_space_ != nullptr) { - bump_pointer_space_->RevokeThreadLocalBuffers(thread); + CHECK_EQ(bump_pointer_space_->RevokeThreadLocalBuffers(thread), 0U); } if (region_space_ != nullptr) { - region_space_->RevokeThreadLocalBuffers(thread); + CHECK_EQ(region_space_->RevokeThreadLocalBuffers(thread), 0U); } } void Heap::RevokeRosAllocThreadLocalBuffers(Thread* thread) { if (rosalloc_space_ != nullptr) { - rosalloc_space_->RevokeThreadLocalBuffers(thread); + size_t freed_bytes_revoke = rosalloc_space_->RevokeThreadLocalBuffers(thread); + if (freed_bytes_revoke > 0U) { + num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke); + CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed()); + } } } void Heap::RevokeAllThreadLocalBuffers() { if (rosalloc_space_ != nullptr) { - rosalloc_space_->RevokeAllThreadLocalBuffers(); + size_t freed_bytes_revoke = rosalloc_space_->RevokeAllThreadLocalBuffers(); + if (freed_bytes_revoke > 0U) { + num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke); + CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed()); + } } if (bump_pointer_space_ != nullptr) { - bump_pointer_space_->RevokeAllThreadLocalBuffers(); + CHECK_EQ(bump_pointer_space_->RevokeAllThreadLocalBuffers(), 0U); } if (region_space_ != nullptr) { - region_space_->RevokeAllThreadLocalBuffers(); + CHECK_EQ(region_space_->RevokeAllThreadLocalBuffers(), 0U); } } @@ -3355,6 +3384,8 @@ void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) { // Just finished a GC, attempt to run finalizers. RunFinalization(env); CHECK(!env->ExceptionCheck()); + // Native bytes allocated may be updated by finalization, refresh it. + new_native_bytes_allocated = native_bytes_allocated_.LoadRelaxed(); } // If we still are over the watermark, attempt a GC for alloc and run finalizers. if (new_native_bytes_allocated > growth_limit_) { diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index d41e17fb75..959ff18516 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -390,6 +390,9 @@ class Heap { // free-list backed space. void RecordFree(uint64_t freed_objects, int64_t freed_bytes); + // Record the bytes freed by thread-local buffer revoke. + void RecordFreeRevoke(); + // Must be called if a field of an Object in the heap changes, and before any GC safe-point. // The call is not needed if NULL is stored in the field. ALWAYS_INLINE void WriteBarrierField(const mirror::Object* dst, MemberOffset /*offset*/, @@ -664,6 +667,11 @@ class Heap { // Whether or not we may use a garbage collector, used so that we only create collectors we need. bool MayUseCollector(CollectorType type) const; + // Used by tests to reduce timinig-dependent flakiness in OOME behavior. + void SetMinIntervalHomogeneousSpaceCompactionByOom(uint64_t interval) { + min_interval_homogeneous_space_compaction_by_oom_ = interval; + } + private: class ConcurrentGCTask; class CollectorTransitionTask; @@ -724,6 +732,7 @@ class Heap { // an initial allocation attempt failed. mirror::Object* AllocateInternalWithGc(Thread* self, AllocatorType allocator, size_t num_bytes, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated, mirror::Class** klass) LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -742,7 +751,8 @@ class Heap { template <const bool kInstrumented, const bool kGrow> ALWAYS_INLINE mirror::Object* TryToAllocate(Thread* self, AllocatorType allocator_type, size_t alloc_size, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) @@ -998,6 +1008,13 @@ class Heap { // Bytes which are allocated and managed by native code but still need to be accounted for. Atomic<size_t> native_bytes_allocated_; + // Number of bytes freed by thread local buffer revokes. This will + // cancel out the ahead-of-time bulk counting of bytes allocated in + // rosalloc thread-local buffers. It is temporarily accumulated + // here to be subtracted from num_bytes_allocated_ later at the next + // GC. + Atomic<size_t> num_bytes_freed_revoke_; + // Info related to the current or previous GC iteration. collector::Iteration current_gc_iteration_; diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h index 9f1f9533d0..14a93d1611 100644 --- a/runtime/gc/space/bump_pointer_space-inl.h +++ b/runtime/gc/space/bump_pointer_space-inl.h @@ -24,7 +24,8 @@ namespace gc { namespace space { inline mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { num_bytes = RoundUp(num_bytes, kAlignment); mirror::Object* ret = AllocNonvirtual(num_bytes); if (LIKELY(ret != nullptr)) { @@ -32,13 +33,15 @@ inline mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t if (usable_size != nullptr) { *usable_size = num_bytes; } + *bytes_tl_bulk_allocated = num_bytes; } return ret; } inline mirror::Object* BumpPointerSpace::AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { Locks::mutator_lock_->AssertExclusiveHeld(self); num_bytes = RoundUp(num_bytes, kAlignment); uint8_t* end = end_.LoadRelaxed(); @@ -54,6 +57,7 @@ inline mirror::Object* BumpPointerSpace::AllocThreadUnsafe(Thread* self, size_t if (UNLIKELY(usable_size != nullptr)) { *usable_size = num_bytes; } + *bytes_tl_bulk_allocated = num_bytes; return obj; } diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc index fbfc4495e0..1303d7729e 100644 --- a/runtime/gc/space/bump_pointer_space.cc +++ b/runtime/gc/space/bump_pointer_space.cc @@ -93,12 +93,13 @@ mirror::Object* BumpPointerSpace::GetNextObject(mirror::Object* obj) { return reinterpret_cast<mirror::Object*>(RoundUp(position, kAlignment)); } -void BumpPointerSpace::RevokeThreadLocalBuffers(Thread* thread) { +size_t BumpPointerSpace::RevokeThreadLocalBuffers(Thread* thread) { MutexLock mu(Thread::Current(), block_lock_); RevokeThreadLocalBuffersLocked(thread); + return 0U; } -void BumpPointerSpace::RevokeAllThreadLocalBuffers() { +size_t BumpPointerSpace::RevokeAllThreadLocalBuffers() { Thread* self = Thread::Current(); MutexLock mu(self, *Locks::runtime_shutdown_lock_); MutexLock mu2(self, *Locks::thread_list_lock_); @@ -107,6 +108,7 @@ void BumpPointerSpace::RevokeAllThreadLocalBuffers() { for (Thread* thread : thread_list) { RevokeThreadLocalBuffers(thread); } + return 0U; } void BumpPointerSpace::AssertThreadLocalBuffersAreRevoked(Thread* thread) { diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h index 089ede4453..c496a422e0 100644 --- a/runtime/gc/space/bump_pointer_space.h +++ b/runtime/gc/space/bump_pointer_space.h @@ -47,10 +47,10 @@ class BumpPointerSpace FINAL : public ContinuousMemMapAllocSpace { // Allocate num_bytes, returns nullptr if the space is full. mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector. mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); mirror::Object* AllocNonvirtual(size_t num_bytes); @@ -103,9 +103,9 @@ class BumpPointerSpace FINAL : public ContinuousMemMapAllocSpace { void Dump(std::ostream& os) const; - void RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(block_lock_); - void RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, - Locks::thread_list_lock_); + size_t RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(block_lock_); + size_t RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, + Locks::thread_list_lock_); void AssertThreadLocalBuffersAreRevoked(Thread* thread) LOCKS_EXCLUDED(block_lock_); void AssertAllThreadLocalBuffersAreRevoked() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_); diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h index 4c8a35e0f7..9eace897e6 100644 --- a/runtime/gc/space/dlmalloc_space-inl.h +++ b/runtime/gc/space/dlmalloc_space-inl.h @@ -27,11 +27,13 @@ namespace space { inline mirror::Object* DlMallocSpace::AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { mirror::Object* obj; { MutexLock mu(self, lock_); - obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size); + obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } if (LIKELY(obj != NULL)) { // Zero freshly allocated memory, done while not holding the space's lock. @@ -49,9 +51,11 @@ inline size_t DlMallocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_ return size + kChunkOverhead; } -inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t num_bytes, - size_t* bytes_allocated, - size_t* usable_size) { +inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked( + Thread* /*self*/, size_t num_bytes, + size_t* bytes_allocated, + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes)); if (LIKELY(result != NULL)) { if (kDebugSpaces) { @@ -61,6 +65,7 @@ inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t allocation_size = AllocationSizeNonvirtual(result, usable_size); DCHECK(bytes_allocated != NULL); *bytes_allocated = allocation_size; + *bytes_tl_bulk_allocated = allocation_size; } return result; } diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc index b8a9dd6639..225861db60 100644 --- a/runtime/gc/space/dlmalloc_space.cc +++ b/runtime/gc/space/dlmalloc_space.cc @@ -123,7 +123,8 @@ void* DlMallocSpace::CreateMspace(void* begin, size_t morecore_start, size_t ini } mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { mirror::Object* result; { MutexLock mu(self, lock_); @@ -131,7 +132,8 @@ mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t max_allowed = Capacity(); mspace_set_footprint_limit(mspace_, max_allowed); // Try the allocation. - result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size); + result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); // Shrink back down as small as possible. size_t footprint = mspace_footprint(mspace_); mspace_set_footprint_limit(mspace_, footprint); diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h index 6ce138c235..1f80f1fd6b 100644 --- a/runtime/gc/space/dlmalloc_space.h +++ b/runtime/gc/space/dlmalloc_space.h @@ -48,11 +48,15 @@ class DlMallocSpace : public MallocSpace { // Virtual to allow ValgrindMallocSpace to intercept. virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_); + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) + OVERRIDE LOCKS_EXCLUDED(lock_); // Virtual to allow ValgrindMallocSpace to intercept. virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_) { - return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE LOCKS_EXCLUDED(lock_) { + return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } // Virtual to allow ValgrindMallocSpace to intercept. virtual size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE { @@ -67,15 +71,22 @@ class DlMallocSpace : public MallocSpace { LOCKS_EXCLUDED(lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + size_t MaxBytesBulkAllocatedFor(size_t num_bytes) OVERRIDE { + return num_bytes; + } + // DlMallocSpaces don't have thread local state. - void RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + size_t RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + return 0U; } - void RevokeAllThreadLocalBuffers() OVERRIDE { + size_t RevokeAllThreadLocalBuffers() OVERRIDE { + return 0U; } // Faster non-virtual allocation path. mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) LOCKS_EXCLUDED(lock_); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + LOCKS_EXCLUDED(lock_); // Faster non-virtual allocation size path. size_t AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size); @@ -134,7 +145,8 @@ class DlMallocSpace : public MallocSpace { private: mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) EXCLUSIVE_LOCKS_REQUIRED(lock_); void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size, diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc index 7523de58bf..5c8e4b9299 100644 --- a/runtime/gc/space/large_object_space.cc +++ b/runtime/gc/space/large_object_space.cc @@ -38,10 +38,11 @@ class ValgrindLargeObjectMapSpace FINAL : public LargeObjectMapSpace { } virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE { + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE { mirror::Object* obj = LargeObjectMapSpace::Alloc(self, num_bytes + kValgrindRedZoneBytes * 2, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); mirror::Object* object_without_rdz = reinterpret_cast<mirror::Object*>( reinterpret_cast<uintptr_t>(obj) + kValgrindRedZoneBytes); VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<void*>(obj), kValgrindRedZoneBytes); @@ -108,7 +109,8 @@ LargeObjectMapSpace* LargeObjectMapSpace::Create(const std::string& name) { } mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { std::string error_msg; MemMap* mem_map = MemMap::MapAnonymous("large object space allocation", nullptr, num_bytes, PROT_READ | PROT_WRITE, true, false, &error_msg); @@ -131,6 +133,8 @@ mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes, if (usable_size != nullptr) { *usable_size = allocation_size; } + DCHECK(bytes_tl_bulk_allocated != nullptr); + *bytes_tl_bulk_allocated = allocation_size; num_bytes_allocated_ += allocation_size; total_bytes_allocated_ += allocation_size; ++num_objects_allocated_; @@ -413,7 +417,7 @@ size_t FreeListSpace::AllocationSize(mirror::Object* obj, size_t* usable_size) { } mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, size_t* bytes_tl_bulk_allocated) { MutexLock mu(self, lock_); const size_t allocation_size = RoundUp(num_bytes, kAlignment); AllocationInfo temp_info; @@ -451,6 +455,8 @@ mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes, size_t* byt if (usable_size != nullptr) { *usable_size = allocation_size; } + DCHECK(bytes_tl_bulk_allocated != nullptr); + *bytes_tl_bulk_allocated = allocation_size; // Need to do these inside of the lock. ++num_objects_allocated_; ++total_objects_allocated_; diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h index 847f575815..d1f9386d09 100644 --- a/runtime/gc/space/large_object_space.h +++ b/runtime/gc/space/large_object_space.h @@ -62,9 +62,11 @@ class LargeObjectSpace : public DiscontinuousSpace, public AllocSpace { } size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) OVERRIDE; // LargeObjectSpaces don't have thread local state. - void RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + size_t RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + return 0U; } - void RevokeAllThreadLocalBuffers() OVERRIDE { + size_t RevokeAllThreadLocalBuffers() OVERRIDE { + return 0U; } bool IsAllocSpace() const OVERRIDE { return true; @@ -124,7 +126,7 @@ class LargeObjectMapSpace : public LargeObjectSpace { // Return the storage space required by obj. size_t AllocationSize(mirror::Object* obj, size_t* usable_size); mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size); + size_t* usable_size, size_t* bytes_tl_bulk_allocated); size_t Free(Thread* self, mirror::Object* ptr); void Walk(DlMallocSpace::WalkCallback, void* arg) OVERRIDE LOCKS_EXCLUDED(lock_); // TODO: disabling thread safety analysis as this may be called when we already hold lock_. @@ -153,7 +155,7 @@ class FreeListSpace FINAL : public LargeObjectSpace { size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(lock_); mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; size_t Free(Thread* self, mirror::Object* obj) OVERRIDE; void Walk(DlMallocSpace::WalkCallback callback, void* arg) OVERRIDE LOCKS_EXCLUDED(lock_); void Dump(std::ostream& os) const; diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc index e17bad8a14..a261663ec7 100644 --- a/runtime/gc/space/large_object_space_test.cc +++ b/runtime/gc/space/large_object_space_test.cc @@ -49,11 +49,13 @@ void LargeObjectSpaceTest::LargeObjectTest() { while (requests.size() < num_allocations) { size_t request_size = test_rand(&rand_seed) % max_allocation_size; size_t allocation_size = 0; + size_t bytes_tl_bulk_allocated; mirror::Object* obj = los->Alloc(Thread::Current(), request_size, &allocation_size, - nullptr); + nullptr, &bytes_tl_bulk_allocated); ASSERT_TRUE(obj != nullptr); ASSERT_EQ(allocation_size, los->AllocationSize(obj, nullptr)); ASSERT_GE(allocation_size, request_size); + ASSERT_EQ(allocation_size, bytes_tl_bulk_allocated); // Fill in our magic value. uint8_t magic = (request_size & 0xFF) | 1; memset(obj, magic, request_size); @@ -83,9 +85,10 @@ void LargeObjectSpaceTest::LargeObjectTest() { // Test that dump doesn't crash. los->Dump(LOG(INFO)); - size_t bytes_allocated = 0; + size_t bytes_allocated = 0, bytes_tl_bulk_allocated; // Checks that the coalescing works. - mirror::Object* obj = los->Alloc(Thread::Current(), 100 * MB, &bytes_allocated, nullptr); + mirror::Object* obj = los->Alloc(Thread::Current(), 100 * MB, &bytes_allocated, nullptr, + &bytes_tl_bulk_allocated); EXPECT_TRUE(obj != nullptr); los->Free(Thread::Current(), obj); @@ -102,8 +105,9 @@ class AllocRaceTask : public Task { void Run(Thread* self) { for (size_t i = 0; i < iterations_ ; ++i) { - size_t alloc_size; - mirror::Object* ptr = los_->Alloc(self, size_, &alloc_size, nullptr); + size_t alloc_size, bytes_tl_bulk_allocated; + mirror::Object* ptr = los_->Alloc(self, size_, &alloc_size, nullptr, + &bytes_tl_bulk_allocated); NanoSleep((id_ + 3) * 1000); // (3+id) mu s diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h index 06239e5e73..bbf1bbbdbd 100644 --- a/runtime/gc/space/malloc_space.h +++ b/runtime/gc/space/malloc_space.h @@ -55,10 +55,11 @@ class MallocSpace : public ContinuousMemMapAllocSpace { // Allocate num_bytes allowing the underlying space to grow. virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) = 0; + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) = 0; // Allocate num_bytes without allowing the underlying space to grow. virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) = 0; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) = 0; // Return the storage space required by obj. If usable_size isn't nullptr then it is set to the // amount of the storage space that may be used by obj. virtual size_t AllocationSize(mirror::Object* obj, size_t* usable_size) = 0; @@ -67,6 +68,11 @@ class MallocSpace : public ContinuousMemMapAllocSpace { virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0; + // Returns the maximum bytes that could be allocated for the given + // size in bulk, that is the maximum value for the + // bytes_allocated_bulk out param returned by MallocSpace::Alloc(). + virtual size_t MaxBytesBulkAllocatedFor(size_t num_bytes) = 0; + #ifndef NDEBUG virtual void CheckMoreCoreForPrecondition() {} // to be overridden in the debug build. #else diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h index a4ed7187c0..1cdf69dbe5 100644 --- a/runtime/gc/space/region_space-inl.h +++ b/runtime/gc/space/region_space-inl.h @@ -24,30 +24,36 @@ namespace gc { namespace space { inline mirror::Object* RegionSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { num_bytes = RoundUp(num_bytes, kAlignment); - return AllocNonvirtual<false>(num_bytes, bytes_allocated, usable_size); + return AllocNonvirtual<false>(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } inline mirror::Object* RegionSpace::AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { Locks::mutator_lock_->AssertExclusiveHeld(self); - return Alloc(self, num_bytes, bytes_allocated, usable_size); + return Alloc(self, num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } template<bool kForEvac> inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { DCHECK(IsAligned<kAlignment>(num_bytes)); mirror::Object* obj; if (LIKELY(num_bytes <= kRegionSize)) { // Non-large object. if (!kForEvac) { - obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size); + obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } else { DCHECK(evac_region_ != nullptr); - obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size); + obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } if (LIKELY(obj != nullptr)) { return obj; @@ -55,9 +61,11 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by MutexLock mu(Thread::Current(), region_lock_); // Retry with current region since another thread may have updated it. if (!kForEvac) { - obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size); + obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } else { - obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size); + obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } if (LIKELY(obj != nullptr)) { return obj; @@ -73,7 +81,7 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by r->Unfree(time_); r->SetNewlyAllocated(); ++num_non_free_regions_; - obj = r->Alloc(num_bytes, bytes_allocated, usable_size); + obj = r->Alloc(num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated); CHECK(obj != nullptr); current_region_ = r; return obj; @@ -85,7 +93,7 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by if (r->IsFree()) { r->Unfree(time_); ++num_non_free_regions_; - obj = r->Alloc(num_bytes, bytes_allocated, usable_size); + obj = r->Alloc(num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated); CHECK(obj != nullptr); evac_region_ = r; return obj; @@ -94,7 +102,8 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by } } else { // Large object. - obj = AllocLarge<kForEvac>(num_bytes, bytes_allocated, usable_size); + obj = AllocLarge<kForEvac>(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); if (LIKELY(obj != nullptr)) { return obj; } @@ -103,7 +112,8 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by } inline mirror::Object* RegionSpace::Region::Alloc(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { DCHECK(IsAllocated() && IsInToSpace()); DCHECK(IsAligned<kAlignment>(num_bytes)); Atomic<uint8_t*>* atomic_top = reinterpret_cast<Atomic<uint8_t*>*>(&top_); @@ -124,6 +134,7 @@ inline mirror::Object* RegionSpace::Region::Alloc(size_t num_bytes, size_t* byte if (usable_size != nullptr) { *usable_size = num_bytes; } + *bytes_tl_bulk_allocated = num_bytes; return reinterpret_cast<mirror::Object*>(old_top); } @@ -253,7 +264,8 @@ inline mirror::Object* RegionSpace::GetNextObject(mirror::Object* obj) { template<bool kForEvac> mirror::Object* RegionSpace::AllocLarge(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { DCHECK(IsAligned<kAlignment>(num_bytes)); DCHECK_GT(num_bytes, kRegionSize); size_t num_regs = RoundUp(num_bytes, kRegionSize) / kRegionSize; @@ -300,6 +312,7 @@ mirror::Object* RegionSpace::AllocLarge(size_t num_bytes, size_t* bytes_allocate if (usable_size != nullptr) { *usable_size = num_regs * kRegionSize; } + *bytes_tl_bulk_allocated = num_bytes; return reinterpret_cast<mirror::Object*>(first_reg->Begin()); } else { // right points to the non-free region. Start with the one after it. diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc index 8bb73d614c..814ab6ce92 100644 --- a/runtime/gc/space/region_space.cc +++ b/runtime/gc/space/region_space.cc @@ -76,7 +76,7 @@ RegionSpace::RegionSpace(const std::string& name, MemMap* mem_map) current_region_ = &full_region_; evac_region_ = nullptr; size_t ignored; - DCHECK(full_region_.Alloc(kAlignment, &ignored, nullptr) == nullptr); + DCHECK(full_region_.Alloc(kAlignment, &ignored, nullptr, &ignored) == nullptr); } size_t RegionSpace::FromSpaceSize() { @@ -356,9 +356,10 @@ bool RegionSpace::AllocNewTlab(Thread* self) { return false; } -void RegionSpace::RevokeThreadLocalBuffers(Thread* thread) { +size_t RegionSpace::RevokeThreadLocalBuffers(Thread* thread) { MutexLock mu(Thread::Current(), region_lock_); RevokeThreadLocalBuffersLocked(thread); + return 0U; } void RegionSpace::RevokeThreadLocalBuffersLocked(Thread* thread) { @@ -377,7 +378,7 @@ void RegionSpace::RevokeThreadLocalBuffersLocked(Thread* thread) { thread->SetTlab(nullptr, nullptr); } -void RegionSpace::RevokeAllThreadLocalBuffers() { +size_t RegionSpace::RevokeAllThreadLocalBuffers() { Thread* self = Thread::Current(); MutexLock mu(self, *Locks::runtime_shutdown_lock_); MutexLock mu2(self, *Locks::thread_list_lock_); @@ -385,6 +386,7 @@ void RegionSpace::RevokeAllThreadLocalBuffers() { for (Thread* thread : thread_list) { RevokeThreadLocalBuffers(thread); } + return 0U; } void RegionSpace::AssertThreadLocalBuffersAreRevoked(Thread* thread) { diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h index 416054716c..b88ce24114 100644 --- a/runtime/gc/space/region_space.h +++ b/runtime/gc/space/region_space.h @@ -42,18 +42,20 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { // Allocate num_bytes, returns nullptr if the space is full. mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector. mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); // The main allocation routine. template<bool kForEvac> ALWAYS_INLINE mirror::Object* AllocNonvirtual(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size); + size_t* usable_size, + size_t* bytes_tl_bulk_allocated); // Allocate/free large objects (objects that are larger than the region size.) template<bool kForEvac> - mirror::Object* AllocLarge(size_t num_bytes, size_t* bytes_allocated, size_t* usable_size); + mirror::Object* AllocLarge(size_t num_bytes, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated); void FreeLarge(mirror::Object* large_obj, size_t bytes_allocated); // Return the storage space required by obj. @@ -87,10 +89,10 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { void DumpRegions(std::ostream& os); void DumpNonFreeRegions(std::ostream& os); - void RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(region_lock_); + size_t RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(region_lock_); void RevokeThreadLocalBuffersLocked(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(region_lock_); - void RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, - Locks::thread_list_lock_); + size_t RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, + Locks::thread_list_lock_); void AssertThreadLocalBuffersAreRevoked(Thread* thread) LOCKS_EXCLUDED(region_lock_); void AssertAllThreadLocalBuffersAreRevoked() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_); @@ -269,7 +271,8 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { } ALWAYS_INLINE mirror::Object* Alloc(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size); + size_t* usable_size, + size_t* bytes_tl_bulk_allocated); bool IsFree() const { bool is_free = state_ == RegionState::kRegionStateFree; diff --git a/runtime/gc/space/rosalloc_space-inl.h b/runtime/gc/space/rosalloc_space-inl.h index 5d6642d349..9d582a3f86 100644 --- a/runtime/gc/space/rosalloc_space-inl.h +++ b/runtime/gc/space/rosalloc_space-inl.h @@ -26,13 +26,19 @@ namespace art { namespace gc { namespace space { +template<bool kMaybeRunningOnValgrind> inline size_t RosAllocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) { // obj is a valid object. Use its class in the header to get the size. // Don't use verification since the object may be dead if we are sweeping. size_t size = obj->SizeOf<kVerifyNone>(); - bool running_on_valgrind = RUNNING_ON_VALGRIND != 0; - if (running_on_valgrind) { - size += 2 * kDefaultValgrindRedZoneBytes; + bool running_on_valgrind = false; + if (kMaybeRunningOnValgrind) { + running_on_valgrind = RUNNING_ON_VALGRIND != 0; + if (running_on_valgrind) { + size += 2 * kDefaultValgrindRedZoneBytes; + } + } else { + DCHECK_EQ(RUNNING_ON_VALGRIND, 0U); } size_t size_by_size = rosalloc_->UsableSize(size); if (kIsDebugBuild) { @@ -55,28 +61,50 @@ inline size_t RosAllocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_ template<bool kThreadSafe> inline mirror::Object* RosAllocSpace::AllocCommon(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { - size_t rosalloc_size = 0; + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { + size_t rosalloc_bytes_allocated = 0; + size_t rosalloc_usable_size = 0; + size_t rosalloc_bytes_tl_bulk_allocated = 0; if (!kThreadSafe) { Locks::mutator_lock_->AssertExclusiveHeld(self); } mirror::Object* result = reinterpret_cast<mirror::Object*>( - rosalloc_->Alloc<kThreadSafe>(self, num_bytes, &rosalloc_size)); + rosalloc_->Alloc<kThreadSafe>(self, num_bytes, &rosalloc_bytes_allocated, + &rosalloc_usable_size, + &rosalloc_bytes_tl_bulk_allocated)); if (LIKELY(result != NULL)) { if (kDebugSpaces) { CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result) << ") not in bounds of allocation space " << *this; } DCHECK(bytes_allocated != NULL); - *bytes_allocated = rosalloc_size; - DCHECK_EQ(rosalloc_size, rosalloc_->UsableSize(result)); + *bytes_allocated = rosalloc_bytes_allocated; + DCHECK_EQ(rosalloc_usable_size, rosalloc_->UsableSize(result)); if (usable_size != nullptr) { - *usable_size = rosalloc_size; + *usable_size = rosalloc_usable_size; } + DCHECK(bytes_tl_bulk_allocated != NULL); + *bytes_tl_bulk_allocated = rosalloc_bytes_tl_bulk_allocated; } return result; } +inline bool RosAllocSpace::CanAllocThreadLocal(Thread* self, size_t num_bytes) { + return rosalloc_->CanAllocFromThreadLocalRun(self, num_bytes); +} + +inline mirror::Object* RosAllocSpace::AllocThreadLocal(Thread* self, size_t num_bytes, + size_t* bytes_allocated) { + DCHECK(bytes_allocated != nullptr); + return reinterpret_cast<mirror::Object*>( + rosalloc_->AllocFromThreadLocalRun(self, num_bytes, bytes_allocated)); +} + +inline size_t RosAllocSpace::MaxBytesBulkAllocatedForNonvirtual(size_t num_bytes) { + return rosalloc_->MaxBytesBulkAllocatedFor(num_bytes); +} + } // namespace space } // namespace gc } // namespace art diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc index ced25a40bb..f140021f76 100644 --- a/runtime/gc/space/rosalloc_space.cc +++ b/runtime/gc/space/rosalloc_space.cc @@ -154,7 +154,8 @@ allocator::RosAlloc* RosAllocSpace::CreateRosAlloc(void* begin, size_t morecore_ } mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { mirror::Object* result; { MutexLock mu(self, lock_); @@ -162,7 +163,8 @@ mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t max_allowed = Capacity(); rosalloc_->SetFootprintLimit(max_allowed); // Try the allocation. - result = AllocCommon(self, num_bytes, bytes_allocated, usable_size); + result = AllocCommon(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); // Shrink back down as small as possible. size_t footprint = rosalloc_->Footprint(); rosalloc_->SetFootprintLimit(footprint); @@ -209,7 +211,7 @@ size_t RosAllocSpace::FreeList(Thread* self, size_t num_ptrs, mirror::Object** p __builtin_prefetch(reinterpret_cast<char*>(ptrs[i + kPrefetchLookAhead])); } if (kVerifyFreedBytes) { - verify_bytes += AllocationSizeNonvirtual(ptrs[i], nullptr); + verify_bytes += AllocationSizeNonvirtual<true>(ptrs[i], nullptr); } } @@ -338,12 +340,12 @@ void RosAllocSpace::InspectAllRosAlloc(void (*callback)(void *start, void *end, } } -void RosAllocSpace::RevokeThreadLocalBuffers(Thread* thread) { - rosalloc_->RevokeThreadLocalRuns(thread); +size_t RosAllocSpace::RevokeThreadLocalBuffers(Thread* thread) { + return rosalloc_->RevokeThreadLocalRuns(thread); } -void RosAllocSpace::RevokeAllThreadLocalBuffers() { - rosalloc_->RevokeAllThreadLocalRuns(); +size_t RosAllocSpace::RevokeAllThreadLocalBuffers() { + return rosalloc_->RevokeAllThreadLocalRuns(); } void RosAllocSpace::AssertThreadLocalBuffersAreRevoked(Thread* thread) { diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h index c856e9560a..36268f76f8 100644 --- a/runtime/gc/space/rosalloc_space.h +++ b/runtime/gc/space/rosalloc_space.h @@ -47,18 +47,21 @@ class RosAllocSpace : public MallocSpace { bool low_memory_mode, bool can_move_objects); mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE LOCKS_EXCLUDED(lock_); mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE { - return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE { + return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { - return AllocNonvirtualThreadUnsafe(self, num_bytes, bytes_allocated, usable_size); + return AllocNonvirtualThreadUnsafe(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE { - return AllocationSizeNonvirtual(obj, usable_size); + return AllocationSizeNonvirtual<true>(obj, usable_size); } size_t Free(Thread* self, mirror::Object* ptr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -66,17 +69,33 @@ class RosAllocSpace : public MallocSpace { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, size_t* bytes_tl_bulk_allocated) { // RosAlloc zeroes memory internally. - return AllocCommon(self, num_bytes, bytes_allocated, usable_size); + return AllocCommon(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } mirror::Object* AllocNonvirtualThreadUnsafe(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { // RosAlloc zeroes memory internally. Pass in false for thread unsafe. - return AllocCommon<false>(self, num_bytes, bytes_allocated, usable_size); + return AllocCommon<false>(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } + // Returns true if the given allocation request can be allocated in + // an existing thread local run without allocating a new run. + ALWAYS_INLINE bool CanAllocThreadLocal(Thread* self, size_t num_bytes); + // Allocate the given allocation request in an existing thread local + // run without allocating a new run. + ALWAYS_INLINE mirror::Object* AllocThreadLocal(Thread* self, size_t num_bytes, + size_t* bytes_allocated); + size_t MaxBytesBulkAllocatedFor(size_t num_bytes) OVERRIDE { + return MaxBytesBulkAllocatedForNonvirtual(num_bytes); + } + ALWAYS_INLINE size_t MaxBytesBulkAllocatedForNonvirtual(size_t num_bytes); + // TODO: NO_THREAD_SAFETY_ANALYSIS because SizeOf() requires that mutator_lock is held. + template<bool kMaybeRunningOnValgrind> size_t AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) NO_THREAD_SAFETY_ANALYSIS; @@ -99,8 +118,8 @@ class RosAllocSpace : public MallocSpace { uint64_t GetBytesAllocated() OVERRIDE; uint64_t GetObjectsAllocated() OVERRIDE; - void RevokeThreadLocalBuffers(Thread* thread); - void RevokeAllThreadLocalBuffers(); + size_t RevokeThreadLocalBuffers(Thread* thread); + size_t RevokeAllThreadLocalBuffers(); void AssertThreadLocalBuffersAreRevoked(Thread* thread); void AssertAllThreadLocalBuffersAreRevoked(); @@ -134,7 +153,7 @@ class RosAllocSpace : public MallocSpace { private: template<bool kThreadSafe = true> mirror::Object* AllocCommon(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size); + size_t* usable_size, size_t* bytes_tl_bulk_allocated); void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size, size_t maximum_size, bool low_memory_mode) OVERRIDE { diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h index d24650b60d..f2378d9ff0 100644 --- a/runtime/gc/space/space.h +++ b/runtime/gc/space/space.h @@ -203,14 +203,24 @@ class AllocSpace { // succeeds, the output parameter bytes_allocated will be set to the // actually allocated bytes which is >= num_bytes. // Alloc can be called from multiple threads at the same time and must be thread-safe. + // + // bytes_tl_bulk_allocated - bytes allocated in bulk ahead of time for a thread local allocation, + // if applicable. It can be + // 1) equal to bytes_allocated if it's not a thread local allocation, + // 2) greater than bytes_allocated if it's a thread local + // allocation that required a new buffer, or + // 3) zero if it's a thread local allocation in an existing + // buffer. + // This is what is to be added to Heap::num_bytes_allocated_. virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) = 0; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) = 0; // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector. virtual mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { - return Alloc(self, num_bytes, bytes_allocated, usable_size); + return Alloc(self, num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } // Return the storage space required by obj. @@ -224,11 +234,15 @@ class AllocSpace { // Revoke any sort of thread-local buffers that are used to speed up allocations for the given // thread, if the alloc space implementation uses any. - virtual void RevokeThreadLocalBuffers(Thread* thread) = 0; + // Returns the total free bytes in the revoked thread local runs that's to be subtracted + // from Heap::num_bytes_allocated_ or zero if unnecessary. + virtual size_t RevokeThreadLocalBuffers(Thread* thread) = 0; // Revoke any sort of thread-local buffers that are used to speed up allocations for all the // threads, if the alloc space implementation uses any. - virtual void RevokeAllThreadLocalBuffers() = 0; + // Returns the total free bytes in the revoked thread local runs that's to be subtracted + // from Heap::num_bytes_allocated_ or zero if unnecessary. + virtual size_t RevokeAllThreadLocalBuffers() = 0; virtual void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) = 0; diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h index 09d10dd94b..3e9e9f7a49 100644 --- a/runtime/gc/space/space_test.h +++ b/runtime/gc/space/space_test.h @@ -61,11 +61,13 @@ class SpaceTest : public CommonRuntimeTest { } mirror::Object* Alloc(space::MallocSpace* alloc_space, Thread* self, size_t bytes, - size_t* bytes_allocated, size_t* usable_size) + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { StackHandleScope<1> hs(self); Handle<mirror::Class> byte_array_class(hs.NewHandle(GetByteArrayClass(self))); - mirror::Object* obj = alloc_space->Alloc(self, bytes, bytes_allocated, usable_size); + mirror::Object* obj = alloc_space->Alloc(self, bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); if (obj != nullptr) { InstallClass(obj, byte_array_class.Get(), bytes); } @@ -73,11 +75,13 @@ class SpaceTest : public CommonRuntimeTest { } mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space, Thread* self, size_t bytes, - size_t* bytes_allocated, size_t* usable_size) + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { StackHandleScope<1> hs(self); Handle<mirror::Class> byte_array_class(hs.NewHandle(GetByteArrayClass(self))); - mirror::Object* obj = alloc_space->AllocWithGrowth(self, bytes, bytes_allocated, usable_size); + mirror::Object* obj = alloc_space->AllocWithGrowth(self, bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); if (obj != nullptr) { InstallClass(obj, byte_array_class.Get(), bytes); } @@ -182,34 +186,38 @@ void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) { ScopedObjectAccess soa(self); // Succeeds, fits without adjusting the footprint limit. - size_t ptr1_bytes_allocated, ptr1_usable_size; + size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated; StackHandleScope<3> hs(soa.Self()); MutableHandle<mirror::Object> ptr1( - hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size))); + hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size, + &ptr1_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr1.Get() != nullptr); EXPECT_LE(1U * MB, ptr1_bytes_allocated); EXPECT_LE(1U * MB, ptr1_usable_size); EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated); + EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated); // Fails, requires a higher footprint limit. - mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr2 == nullptr); // Succeeds, adjusts the footprint. - size_t ptr3_bytes_allocated, ptr3_usable_size; + size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated; MutableHandle<mirror::Object> ptr3( - hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size))); + hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size, + &ptr3_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr3.Get() != nullptr); EXPECT_LE(8U * MB, ptr3_bytes_allocated); EXPECT_LE(8U * MB, ptr3_usable_size); EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated); + EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated); // Fails, requires a higher footprint limit. - mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr4 == nullptr); // Also fails, requires a higher allowed footprint. - mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr5 == nullptr); // Release some memory. @@ -219,13 +227,15 @@ void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) { EXPECT_LE(8U * MB, free3); // Succeeds, now that memory has been freed. - size_t ptr6_bytes_allocated, ptr6_usable_size; + size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated; Handle<mirror::Object> ptr6( - hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size))); + hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size, + &ptr6_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr6.Get() != nullptr); EXPECT_LE(9U * MB, ptr6_bytes_allocated); EXPECT_LE(9U * MB, ptr6_usable_size); EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated); + EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated); // Final clean up. size_t free1 = space->AllocationSize(ptr1.Get(), nullptr); @@ -233,7 +243,7 @@ void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) { EXPECT_LE(1U * MB, free1); // Make sure that the zygote space isn't directly at the start of the space. - EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr) != nullptr); + EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr, &dummy) != nullptr); gc::Heap* heap = Runtime::Current()->GetHeap(); space::Space* old_space = space; @@ -250,22 +260,26 @@ void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) { AddSpace(space, false); // Succeeds, fits without adjusting the footprint limit. - ptr1.Assign(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size)); + ptr1.Assign(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size, + &ptr1_bytes_tl_bulk_allocated)); EXPECT_TRUE(ptr1.Get() != nullptr); EXPECT_LE(1U * MB, ptr1_bytes_allocated); EXPECT_LE(1U * MB, ptr1_usable_size); EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated); + EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated); // Fails, requires a higher footprint limit. - ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr); + ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr2 == nullptr); // Succeeds, adjusts the footprint. - ptr3.Assign(AllocWithGrowth(space, self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size)); + ptr3.Assign(AllocWithGrowth(space, self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size, + &ptr3_bytes_tl_bulk_allocated)); EXPECT_TRUE(ptr3.Get() != nullptr); EXPECT_LE(2U * MB, ptr3_bytes_allocated); EXPECT_LE(2U * MB, ptr3_usable_size); EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated); + EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated); space->Free(self, ptr3.Assign(nullptr)); // Final clean up. @@ -285,34 +299,38 @@ void SpaceTest::AllocAndFreeTestBody(CreateSpaceFn create_space) { AddSpace(space); // Succeeds, fits without adjusting the footprint limit. - size_t ptr1_bytes_allocated, ptr1_usable_size; + size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated; StackHandleScope<3> hs(soa.Self()); MutableHandle<mirror::Object> ptr1( - hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size))); + hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size, + &ptr1_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr1.Get() != nullptr); EXPECT_LE(1U * MB, ptr1_bytes_allocated); EXPECT_LE(1U * MB, ptr1_usable_size); EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated); + EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated); // Fails, requires a higher footprint limit. - mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr2 == nullptr); // Succeeds, adjusts the footprint. - size_t ptr3_bytes_allocated, ptr3_usable_size; + size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated; MutableHandle<mirror::Object> ptr3( - hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size))); + hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size, + &ptr3_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr3.Get() != nullptr); EXPECT_LE(8U * MB, ptr3_bytes_allocated); EXPECT_LE(8U * MB, ptr3_usable_size); EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated); + EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated); // Fails, requires a higher footprint limit. - mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr4 == nullptr); // Also fails, requires a higher allowed footprint. - mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr5 == nullptr); // Release some memory. @@ -322,13 +340,15 @@ void SpaceTest::AllocAndFreeTestBody(CreateSpaceFn create_space) { EXPECT_LE(8U * MB, free3); // Succeeds, now that memory has been freed. - size_t ptr6_bytes_allocated, ptr6_usable_size; + size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated; Handle<mirror::Object> ptr6( - hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size))); + hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size, + &ptr6_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr6.Get() != nullptr); EXPECT_LE(9U * MB, ptr6_bytes_allocated); EXPECT_LE(9U * MB, ptr6_usable_size); EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated); + EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated); // Final clean up. size_t free1 = space->AllocationSize(ptr1.Get(), nullptr); @@ -348,14 +368,16 @@ void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) { // Succeeds, fits without adjusting the max allowed footprint. mirror::Object* lots_of_objects[1024]; for (size_t i = 0; i < arraysize(lots_of_objects); i++) { - size_t allocation_size, usable_size; + size_t allocation_size, usable_size, bytes_tl_bulk_allocated; size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray(); lots_of_objects[i] = Alloc(space, self, size_of_zero_length_byte_array, &allocation_size, - &usable_size); + &usable_size, &bytes_tl_bulk_allocated); EXPECT_TRUE(lots_of_objects[i] != nullptr); size_t computed_usable_size; EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size)); EXPECT_EQ(usable_size, computed_usable_size); + EXPECT_TRUE(bytes_tl_bulk_allocated == 0 || + bytes_tl_bulk_allocated >= allocation_size); } // Release memory. @@ -363,12 +385,15 @@ void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) { // Succeeds, fits by adjusting the max allowed footprint. for (size_t i = 0; i < arraysize(lots_of_objects); i++) { - size_t allocation_size, usable_size; - lots_of_objects[i] = AllocWithGrowth(space, self, 1024, &allocation_size, &usable_size); + size_t allocation_size, usable_size, bytes_tl_bulk_allocated; + lots_of_objects[i] = AllocWithGrowth(space, self, 1024, &allocation_size, &usable_size, + &bytes_tl_bulk_allocated); EXPECT_TRUE(lots_of_objects[i] != nullptr); size_t computed_usable_size; EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size)); EXPECT_EQ(usable_size, computed_usable_size); + EXPECT_TRUE(bytes_tl_bulk_allocated == 0 || + bytes_tl_bulk_allocated >= allocation_size); } // Release memory. @@ -425,10 +450,13 @@ void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t StackHandleScope<1> hs(soa.Self()); auto object(hs.NewHandle<mirror::Object>(nullptr)); size_t bytes_allocated = 0; + size_t bytes_tl_bulk_allocated; if (round <= 1) { - object.Assign(Alloc(space, self, alloc_size, &bytes_allocated, nullptr)); + object.Assign(Alloc(space, self, alloc_size, &bytes_allocated, nullptr, + &bytes_tl_bulk_allocated)); } else { - object.Assign(AllocWithGrowth(space, self, alloc_size, &bytes_allocated, nullptr)); + object.Assign(AllocWithGrowth(space, self, alloc_size, &bytes_allocated, nullptr, + &bytes_tl_bulk_allocated)); } footprint = space->GetFootprint(); EXPECT_GE(space->Size(), footprint); // invariant @@ -441,6 +469,8 @@ void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t } else { EXPECT_GE(allocation_size, 8u); } + EXPECT_TRUE(bytes_tl_bulk_allocated == 0 || + bytes_tl_bulk_allocated >= allocation_size); amount_allocated += allocation_size; break; } @@ -518,11 +548,13 @@ void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t auto large_object(hs.NewHandle<mirror::Object>(nullptr)); size_t three_quarters_space = (growth_limit / 2) + (growth_limit / 4); size_t bytes_allocated = 0; + size_t bytes_tl_bulk_allocated; if (round <= 1) { - large_object.Assign(Alloc(space, self, three_quarters_space, &bytes_allocated, nullptr)); + large_object.Assign(Alloc(space, self, three_quarters_space, &bytes_allocated, nullptr, + &bytes_tl_bulk_allocated)); } else { large_object.Assign(AllocWithGrowth(space, self, three_quarters_space, &bytes_allocated, - nullptr)); + nullptr, &bytes_tl_bulk_allocated)); } EXPECT_TRUE(large_object.Get() != nullptr); diff --git a/runtime/gc/space/valgrind_malloc_space-inl.h b/runtime/gc/space/valgrind_malloc_space-inl.h index ae8e892e29..bc329e129c 100644 --- a/runtime/gc/space/valgrind_malloc_space-inl.h +++ b/runtime/gc/space/valgrind_malloc_space-inl.h @@ -32,10 +32,15 @@ namespace valgrind_details { template <size_t kValgrindRedZoneBytes, bool kUseObjSizeForUsable> inline mirror::Object* AdjustForValgrind(void* obj_with_rdz, size_t num_bytes, size_t bytes_allocated, size_t usable_size, - size_t* bytes_allocated_out, size_t* usable_size_out) { + size_t bytes_tl_bulk_allocated, + size_t* bytes_allocated_out, size_t* usable_size_out, + size_t* bytes_tl_bulk_allocated_out) { if (bytes_allocated_out != nullptr) { *bytes_allocated_out = bytes_allocated; } + if (bytes_tl_bulk_allocated_out != nullptr) { + *bytes_tl_bulk_allocated_out = bytes_tl_bulk_allocated; + } // This cuts over-provision and is a trade-off between testing the over-provisioning code paths // vs checking overflows in the regular paths. @@ -82,20 +87,25 @@ ValgrindMallocSpace<S, kValgrindRedZoneBytes, kAdjustForRedzoneInAllocSize, kUseObjSizeForUsable>::AllocWithGrowth( - Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out) { + Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out, + size_t* bytes_tl_bulk_allocated_out) { size_t bytes_allocated; size_t usable_size; + size_t bytes_tl_bulk_allocated; void* obj_with_rdz = S::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes, - &bytes_allocated, &usable_size); + &bytes_allocated, &usable_size, + &bytes_tl_bulk_allocated); if (obj_with_rdz == nullptr) { return nullptr; } - return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, - kUseObjSizeForUsable>(obj_with_rdz, num_bytes, - bytes_allocated, usable_size, - bytes_allocated_out, - usable_size_out); + return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, kUseObjSizeForUsable>( + obj_with_rdz, num_bytes, + bytes_allocated, usable_size, + bytes_tl_bulk_allocated, + bytes_allocated_out, + usable_size_out, + bytes_tl_bulk_allocated_out); } template <typename S, @@ -106,11 +116,13 @@ mirror::Object* ValgrindMallocSpace<S, kValgrindRedZoneBytes, kAdjustForRedzoneInAllocSize, kUseObjSizeForUsable>::Alloc( - Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out) { + Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out, + size_t* bytes_tl_bulk_allocated_out) { size_t bytes_allocated; size_t usable_size; + size_t bytes_tl_bulk_allocated; void* obj_with_rdz = S::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes, - &bytes_allocated, &usable_size); + &bytes_allocated, &usable_size, &bytes_tl_bulk_allocated); if (obj_with_rdz == nullptr) { return nullptr; } @@ -118,8 +130,10 @@ mirror::Object* ValgrindMallocSpace<S, return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, kUseObjSizeForUsable>(obj_with_rdz, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated, bytes_allocated_out, - usable_size_out); + usable_size_out, + bytes_tl_bulk_allocated_out); } template <typename S, @@ -130,20 +144,25 @@ mirror::Object* ValgrindMallocSpace<S, kValgrindRedZoneBytes, kAdjustForRedzoneInAllocSize, kUseObjSizeForUsable>::AllocThreadUnsafe( - Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out) { + Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out, + size_t* bytes_tl_bulk_allocated_out) { size_t bytes_allocated; size_t usable_size; + size_t bytes_tl_bulk_allocated; void* obj_with_rdz = S::AllocThreadUnsafe(self, num_bytes + 2 * kValgrindRedZoneBytes, - &bytes_allocated, &usable_size); + &bytes_allocated, &usable_size, + &bytes_tl_bulk_allocated); if (obj_with_rdz == nullptr) { return nullptr; } - return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, - kUseObjSizeForUsable>(obj_with_rdz, num_bytes, - bytes_allocated, usable_size, - bytes_allocated_out, - usable_size_out); + return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, kUseObjSizeForUsable>( + obj_with_rdz, num_bytes, + bytes_allocated, usable_size, + bytes_tl_bulk_allocated, + bytes_allocated_out, + usable_size_out, + bytes_tl_bulk_allocated_out); } template <typename S, @@ -226,6 +245,17 @@ ValgrindMallocSpace<S, mem_map->Size() - initial_size); } +template <typename S, + size_t kValgrindRedZoneBytes, + bool kAdjustForRedzoneInAllocSize, + bool kUseObjSizeForUsable> +size_t ValgrindMallocSpace<S, + kValgrindRedZoneBytes, + kAdjustForRedzoneInAllocSize, + kUseObjSizeForUsable>::MaxBytesBulkAllocatedFor(size_t num_bytes) { + return S::MaxBytesBulkAllocatedFor(num_bytes + 2 * kValgrindRedZoneBytes); +} + } // namespace space } // namespace gc } // namespace art diff --git a/runtime/gc/space/valgrind_malloc_space.h b/runtime/gc/space/valgrind_malloc_space.h index 707ea69a20..a6b010a2a1 100644 --- a/runtime/gc/space/valgrind_malloc_space.h +++ b/runtime/gc/space/valgrind_malloc_space.h @@ -34,12 +34,13 @@ template <typename BaseMallocSpaceType, class ValgrindMallocSpace FINAL : public BaseMallocSpaceType { public: mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE; mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE - EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE; @@ -53,6 +54,8 @@ class ValgrindMallocSpace FINAL : public BaseMallocSpaceType { UNUSED(ptr); } + size_t MaxBytesBulkAllocatedFor(size_t num_bytes) OVERRIDE; + template <typename... Params> explicit ValgrindMallocSpace(MemMap* mem_map, size_t initial_size, Params... params); virtual ~ValgrindMallocSpace() {} diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc index a868e6831d..9e882a898e 100644 --- a/runtime/gc/space/zygote_space.cc +++ b/runtime/gc/space/zygote_space.cc @@ -77,7 +77,7 @@ void ZygoteSpace::Dump(std::ostream& os) const { << ",name=\"" << GetName() << "\"]"; } -mirror::Object* ZygoteSpace::Alloc(Thread*, size_t, size_t*, size_t*) { +mirror::Object* ZygoteSpace::Alloc(Thread*, size_t, size_t*, size_t*, size_t*) { UNIMPLEMENTED(FATAL); UNREACHABLE(); } diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h index 0cf4bb139c..934a234345 100644 --- a/runtime/gc/space/zygote_space.h +++ b/runtime/gc/space/zygote_space.h @@ -46,7 +46,7 @@ class ZygoteSpace FINAL : public ContinuousMemMapAllocSpace { } mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE; @@ -55,9 +55,11 @@ class ZygoteSpace FINAL : public ContinuousMemMapAllocSpace { size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) OVERRIDE; // ZygoteSpaces don't have thread local state. - void RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + size_t RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + return 0U; } - void RevokeAllThreadLocalBuffers() OVERRIDE { + size_t RevokeAllThreadLocalBuffers() OVERRIDE { + return 0U; } uint64_t GetBytesAllocated() { diff --git a/runtime/memory_region.h b/runtime/memory_region.h index b3820be26c..939a1a9212 100644 --- a/runtime/memory_region.h +++ b/runtime/memory_region.h @@ -23,6 +23,7 @@ #include "base/macros.h" #include "base/value_object.h" #include "globals.h" +#include "utils.h" namespace art { @@ -45,14 +46,64 @@ class MemoryRegion FINAL : public ValueObject { uint8_t* start() const { return reinterpret_cast<uint8_t*>(pointer_); } uint8_t* end() const { return start() + size_; } + // Load value of type `T` at `offset`. The memory address corresponding + // to `offset` should be word-aligned. template<typename T> T Load(uintptr_t offset) const { + // TODO: DCHECK that the address is word-aligned. return *ComputeInternalPointer<T>(offset); } + // Store `value` (of type `T`) at `offset`. The memory address + // corresponding to `offset` should be word-aligned. template<typename T> void Store(uintptr_t offset, T value) const { + // TODO: DCHECK that the address is word-aligned. *ComputeInternalPointer<T>(offset) = value; } + // TODO: Local hack to prevent name clashes between two conflicting + // implementations of bit_cast: + // - art::bit_cast<Destination, Source> runtime/base/casts.h, and + // - art::bit_cast<Source, Destination> from runtime/utils.h. + // Remove this when these routines have been merged. + template<typename Source, typename Destination> + static Destination local_bit_cast(Source in) { + static_assert(sizeof(Source) <= sizeof(Destination), + "Size of Source not <= size of Destination"); + union { + Source u; + Destination v; + } tmp; + tmp.u = in; + return tmp.v; + } + + // Load value of type `T` at `offset`. The memory address corresponding + // to `offset` does not need to be word-aligned. + template<typename T> T LoadUnaligned(uintptr_t offset) const { + // Equivalent unsigned integer type corresponding to T. + typedef typename UnsignedIntegerType<sizeof(T)>::type U; + U equivalent_unsigned_integer_value = 0; + // Read the value byte by byte in a little-endian fashion. + for (size_t i = 0; i < sizeof(U); ++i) { + equivalent_unsigned_integer_value += + *ComputeInternalPointer<uint8_t>(offset + i) << (i * kBitsPerByte); + } + return local_bit_cast<U, T>(equivalent_unsigned_integer_value); + } + + // Store `value` (of type `T`) at `offset`. The memory address + // corresponding to `offset` does not need to be word-aligned. + template<typename T> void StoreUnaligned(uintptr_t offset, T value) const { + // Equivalent unsigned integer type corresponding to T. + typedef typename UnsignedIntegerType<sizeof(T)>::type U; + U equivalent_unsigned_integer_value = local_bit_cast<T, U>(value); + // Write the value byte by byte in a little-endian fashion. + for (size_t i = 0; i < sizeof(U); ++i) { + *ComputeInternalPointer<uint8_t>(offset + i) = + (equivalent_unsigned_integer_value >> (i * kBitsPerByte)) & 0xFF; + } + } + template<typename T> T* PointerTo(uintptr_t offset) const { return ComputeInternalPointer<T>(offset); } diff --git a/runtime/memory_region_test.cc b/runtime/memory_region_test.cc new file mode 100644 index 0000000000..72e03a485a --- /dev/null +++ b/runtime/memory_region_test.cc @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "memory_region.h" + +#include "gtest/gtest.h" + +namespace art { + +TEST(MemoryRegion, LoadUnaligned) { + const size_t n = 8; + uint8_t data[n] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + MemoryRegion region(&data, n); + + ASSERT_EQ(0, region.LoadUnaligned<char>(0)); + ASSERT_EQ(1u + + (2u << kBitsPerByte) + + (3u << 2 * kBitsPerByte) + + (4u << 3 * kBitsPerByte), + region.LoadUnaligned<uint32_t>(1)); + ASSERT_EQ(5 + (6 << kBitsPerByte), region.LoadUnaligned<int16_t>(5)); + ASSERT_EQ(7u, region.LoadUnaligned<unsigned char>(7)); +} + +TEST(MemoryRegion, StoreUnaligned) { + const size_t n = 8; + uint8_t data[n] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + MemoryRegion region(&data, n); + + region.StoreUnaligned<unsigned char>(0u, 7); + region.StoreUnaligned<int16_t>(1, 6 + (5 << kBitsPerByte)); + region.StoreUnaligned<uint32_t>(3, + 4u + + (3u << kBitsPerByte) + + (2u << 2 * kBitsPerByte) + + (1u << 3 * kBitsPerByte)); + region.StoreUnaligned<char>(7, 0); + + uint8_t expected[n] = { 7, 6, 5, 4, 3, 2, 1, 0 }; + for (size_t i = 0; i < n; ++i) { + ASSERT_EQ(expected[i], data[i]); + } +} + +} // namespace art diff --git a/runtime/primitive.h b/runtime/primitive.h index 9dda144755..2d6b6b30c7 100644 --- a/runtime/primitive.h +++ b/runtime/primitive.h @@ -165,6 +165,10 @@ class Primitive { } } + static bool IsIntOrLongType(Type type) { + return type == kPrimInt || type == kPrimLong; + } + static bool Is64BitType(Type type) { return type == kPrimLong || type == kPrimDouble; } diff --git a/runtime/stack.cc b/runtime/stack.cc index 48becf688f..e420c57346 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -204,29 +204,32 @@ bool StackVisitor::GetVRegFromOptimizedCode(mirror::ArtMethod* m, uint16_t vreg, DCHECK(code_item != nullptr) << PrettyMethod(m); // Can't be NULL or how would we compile // its instructions? DCHECK_LT(vreg, code_item->registers_size_); - DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, - code_item->registers_size_); - DexRegisterMap::LocationKind location_kind = dex_register_map.GetLocationKind(vreg); + DexRegisterMap dex_register_map = + code_info.GetDexRegisterMapOf(stack_map, code_item->registers_size_); + DexRegisterLocation::Kind location_kind = dex_register_map.GetLocationKind(vreg); switch (location_kind) { - case DexRegisterMap::kInStack: { + case DexRegisterLocation::Kind::kInStack: { const int32_t offset = dex_register_map.GetStackOffsetInBytes(vreg); const uint8_t* addr = reinterpret_cast<const uint8_t*>(cur_quick_frame_) + offset; *val = *reinterpret_cast<const uint32_t*>(addr); return true; } - case DexRegisterMap::kInRegister: - case DexRegisterMap::kInFpuRegister: { + case DexRegisterLocation::Kind::kInRegister: + case DexRegisterLocation::Kind::kInFpuRegister: { uint32_t reg = dex_register_map.GetMachineRegister(vreg); return GetRegisterIfAccessible(reg, kind, val); } - case DexRegisterMap::kConstant: + case DexRegisterLocation::Kind::kConstant: *val = dex_register_map.GetConstant(vreg); return true; - case DexRegisterMap::kNone: + case DexRegisterLocation::Kind::kNone: return false; + default: + LOG(FATAL) + << "Unexpected location kind" + << DexRegisterLocation::PrettyDescriptor(dex_register_map.GetLocationInternalKind(vreg)); + UNREACHABLE(); } - UNREACHABLE(); - return false; } bool StackVisitor::GetRegisterIfAccessible(uint32_t reg, VRegKind kind, uint32_t* val) const { @@ -386,29 +389,29 @@ bool StackVisitor::SetVRegFromOptimizedCode(mirror::ArtMethod* m, uint16_t vreg, DCHECK(code_item != nullptr) << PrettyMethod(m); // Can't be NULL or how would we compile // its instructions? DCHECK_LT(vreg, code_item->registers_size_); - DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, - code_item->registers_size_); - DexRegisterMap::LocationKind location_kind = dex_register_map.GetLocationKind(vreg); + DexRegisterMap dex_register_map = + code_info.GetDexRegisterMapOf(stack_map, code_item->registers_size_); + DexRegisterLocation::Kind location_kind = dex_register_map.GetLocationKind(vreg); uint32_t dex_pc = m->ToDexPc(cur_quick_frame_pc_, false); switch (location_kind) { - case DexRegisterMap::kInStack: { + case DexRegisterLocation::Kind::kInStack: { const int32_t offset = dex_register_map.GetStackOffsetInBytes(vreg); uint8_t* addr = reinterpret_cast<uint8_t*>(cur_quick_frame_) + offset; *reinterpret_cast<uint32_t*>(addr) = new_value; return true; } - case DexRegisterMap::kInRegister: - case DexRegisterMap::kInFpuRegister: { + case DexRegisterLocation::Kind::kInRegister: + case DexRegisterLocation::Kind::kInFpuRegister: { uint32_t reg = dex_register_map.GetMachineRegister(vreg); return SetRegisterIfAccessible(reg, new_value, kind); } - case DexRegisterMap::kConstant: + case DexRegisterLocation::Kind::kConstant: LOG(ERROR) << StringPrintf("Cannot change value of DEX register v%u used as a constant at " "DEX pc 0x%x (native pc 0x%x) of method %s", vreg, dex_pc, native_pc_offset, PrettyMethod(cur_quick_frame_->AsMirrorPtr()).c_str()); return false; - case DexRegisterMap::kNone: + case DexRegisterLocation::Kind::kNone: LOG(ERROR) << StringPrintf("No location for DEX register v%u at DEX pc 0x%x " "(native pc 0x%x) of method %s", vreg, dex_pc, native_pc_offset, diff --git a/runtime/stack_map.h b/runtime/stack_map.h index 6d996722b4..c98162306c 100644 --- a/runtime/stack_map.h +++ b/runtime/stack_map.h @@ -23,6 +23,11 @@ namespace art { +// Size of a frame slot, in bytes. This constant is a signed value, +// to please the compiler in arithmetic operations involving int32_t +// (signed) values. +static ssize_t constexpr kFrameSlotSize = 4; + /** * Classes in the following file are wrapper on stack map information backed * by a MemoryRegion. As such they read and write to the region, they don't have @@ -58,6 +63,8 @@ class InlineInfo { } private: + // TODO: Instead of plain types such as "uint8_t", introduce + // typedefs (and document the memory layout of InlineInfo). static constexpr int kDepthOffset = 0; static constexpr int kFixedSize = kDepthOffset + sizeof(uint8_t); @@ -68,82 +75,327 @@ class InlineInfo { friend class StackMapStream; }; +// Dex register location container used by DexRegisterMap and StackMapStream. +class DexRegisterLocation { + public: + /* + * The location kind used to populate the Dex register information in a + * StackMapStream can either be: + * - kNone: the register has no location yet, meaning it has not been set; + * - kConstant: value holds the constant; + * - kStack: value holds the stack offset; + * - kRegister: value holds the physical register number; + * - kFpuRegister: value holds the physical register number. + * + * In addition, DexRegisterMap also uses these values: + * - kInStackLargeOffset: value holds a "large" stack offset (greater than + * 128 bytes); + * - kConstantLargeValue: value holds a "large" constant (lower than or + * equal to -16, or greater than 16). + */ + enum class Kind : uint8_t { + // Short location kinds, for entries fitting on one byte (3 bits + // for the kind, 5 bits for the value) in a DexRegisterMap. + kNone = 0, // 0b000 + kInStack = 1, // 0b001 + kInRegister = 2, // 0b010 + kInFpuRegister = 3, // 0b011 + kConstant = 4, // 0b100 + + // Large location kinds, requiring a 5-byte encoding (1 byte for the + // kind, 4 bytes for the value). + + // Stack location at a large offset, meaning that the offset value + // divided by the stack frame slot size (4 bytes) cannot fit on a + // 5-bit unsigned integer (i.e., this offset value is greater than + // or equal to 2^5 * 4 = 128 bytes). + kInStackLargeOffset = 5, // 0b101 + + // Large constant, that cannot fit on a 5-bit signed integer (i.e., + // lower than -2^(5-1) = -16, or greater than or equal to + // 2^(5-1) - 1 = 15). + kConstantLargeValue = 6, // 0b110 + + kLastLocationKind = kConstantLargeValue + }; + + static_assert( + sizeof(Kind) == 1u, + "art::DexRegisterLocation::Kind has a size different from one byte."); + + static const char* PrettyDescriptor(Kind kind) { + switch (kind) { + case Kind::kNone: + return "none"; + case Kind::kInStack: + return "in stack"; + case Kind::kInRegister: + return "in register"; + case Kind::kInFpuRegister: + return "in fpu register"; + case Kind::kConstant: + return "as constant"; + case Kind::kInStackLargeOffset: + return "in stack (large offset)"; + case Kind::kConstantLargeValue: + return "as constant (large value)"; + default: + UNREACHABLE(); + } + } + + static bool IsShortLocationKind(Kind kind) { + switch (kind) { + case Kind::kNone: + case Kind::kInStack: + case Kind::kInRegister: + case Kind::kInFpuRegister: + case Kind::kConstant: + return true; + + case Kind::kInStackLargeOffset: + case Kind::kConstantLargeValue: + return false; + + default: + UNREACHABLE(); + } + } + + // Convert `kind` to a "surface" kind, i.e. one that doesn't include + // any value with a "large" qualifier. + // TODO: Introduce another enum type for the surface kind? + static Kind ConvertToSurfaceKind(Kind kind) { + switch (kind) { + case Kind::kNone: + case Kind::kInStack: + case Kind::kInRegister: + case Kind::kInFpuRegister: + case Kind::kConstant: + return kind; + + case Kind::kInStackLargeOffset: + return Kind::kInStack; + + case Kind::kConstantLargeValue: + return Kind::kConstant; + + default: + UNREACHABLE(); + } + } + + DexRegisterLocation(Kind kind, int32_t value) + : kind_(kind), value_(value) {} + + // Get the "surface" kind of the location, i.e., the one that doesn't + // include any value with a "large" qualifier. + Kind GetKind() const { + return ConvertToSurfaceKind(kind_); + } + + // Get the value of the location. + int32_t GetValue() const { return value_; } + + // Get the actual kind of the location. + Kind GetInternalKind() const { return kind_; } + + private: + Kind kind_; + int32_t value_; +}; + /** * Information on dex register values for a specific PC. The information is * of the form: * [location_kind, register_value]+. - * - * The location_kind for a Dex register can either be: - * - kConstant: register_value holds the constant, - * - kStack: register_value holds the stack offset, - * - kRegister: register_value holds the physical register number. - * - kFpuRegister: register_value holds the physical register number. - * - kNone: the register has no location yet, meaning it has not been set. + * either on 1 or 5 bytes (see art::DexRegisterLocation::Kind). */ class DexRegisterMap { public: explicit DexRegisterMap(MemoryRegion region) : region_(region) {} - enum LocationKind { - kNone, - kInStack, - kInRegister, - kInFpuRegister, - kConstant - }; + // Short (compressed) location, fitting on one byte. + typedef uint8_t ShortLocation; + + void SetRegisterInfo(size_t offset, const DexRegisterLocation& dex_register_location) { + DexRegisterLocation::Kind kind = ComputeCompressedKind(dex_register_location); + int32_t value = dex_register_location.GetValue(); + if (DexRegisterLocation::IsShortLocationKind(kind)) { + // Short location. Compress the kind and the value as a single byte. + if (kind == DexRegisterLocation::Kind::kInStack) { + // Instead of storing stack offsets expressed in bytes for + // short stack locations, store slot offsets. A stack offset + // is a multiple of 4 (kFrameSlotSize). This means that by + // dividing it by 4, we can fit values from the [0, 128) + // interval in a short stack location, and not just values + // from the [0, 32) interval. + DCHECK_EQ(value % kFrameSlotSize, 0); + value /= kFrameSlotSize; + } + DCHECK(IsUint<kValueBits>(value)) << value; + region_.StoreUnaligned<ShortLocation>(offset, MakeShortLocation(kind, value)); + } else { + // Large location. Write the location on one byte and the value + // on 4 bytes. + DCHECK(!IsUint<kValueBits>(value)) << value; + if (kind == DexRegisterLocation::Kind::kInStackLargeOffset) { + // Also divide large stack offsets by 4 for the sake of consistency. + DCHECK_EQ(value % kFrameSlotSize, 0); + value /= kFrameSlotSize; + } + // Data can be unaligned as the written Dex register locations can + // either be 1-byte or 5-byte wide. Use + // art::MemoryRegion::StoreUnaligned instead of + // art::MemoryRegion::Store to prevent unligned word accesses on ARM. + region_.StoreUnaligned<DexRegisterLocation::Kind>(offset, kind); + region_.StoreUnaligned<int32_t>(offset + sizeof(DexRegisterLocation::Kind), value); + } + } - static const char* PrettyDescriptor(LocationKind kind) { - switch (kind) { - case kNone: - return "none"; - case kInStack: - return "in stack"; - case kInRegister: - return "in register"; - case kInFpuRegister: - return "in fpu register"; - case kConstant: - return "as constant"; + // Find the offset of the Dex register location number `dex_register_index`. + size_t FindLocationOffset(uint16_t dex_register_index) const { + size_t offset = kFixedSize; + // Skip the first `dex_register_index - 1` entries. + for (uint16_t i = 0; i < dex_register_index; ++i) { + // Read the first next byte and inspect its first 3 bits to decide + // whether it is a short or a large location. + DexRegisterLocation::Kind kind = ExtractKindAtOffset(offset); + if (DexRegisterLocation::IsShortLocationKind(kind)) { + // Short location. Skip the current byte. + offset += SingleShortEntrySize(); + } else { + // Large location. Skip the 5 next bytes. + offset += SingleLargeEntrySize(); + } } - UNREACHABLE(); - return nullptr; + return offset; } - LocationKind GetLocationKind(uint16_t register_index) const { - return region_.Load<LocationKind>( - kFixedSize + register_index * SingleEntrySize()); + // Get the surface kind. + DexRegisterLocation::Kind GetLocationKind(uint16_t dex_register_index) const { + return DexRegisterLocation::ConvertToSurfaceKind(GetLocationInternalKind(dex_register_index)); } - void SetRegisterInfo(uint16_t register_index, LocationKind kind, int32_t value) { - size_t entry = kFixedSize + register_index * SingleEntrySize(); - region_.Store<LocationKind>(entry, kind); - region_.Store<int32_t>(entry + sizeof(LocationKind), value); + // Get the internal kind. + DexRegisterLocation::Kind GetLocationInternalKind(uint16_t dex_register_index) const { + size_t offset = FindLocationOffset(dex_register_index); + return ExtractKindAtOffset(offset); } - int32_t GetValue(uint16_t register_index) const { - return region_.Load<int32_t>( - kFixedSize + sizeof(LocationKind) + register_index * SingleEntrySize()); + // TODO: Rename as GetDexRegisterLocation? + DexRegisterLocation GetLocationKindAndValue(uint16_t dex_register_index) const { + size_t offset = FindLocationOffset(dex_register_index); + // Read the first byte and inspect its first 3 bits to get the location. + ShortLocation first_byte = region_.LoadUnaligned<ShortLocation>(offset); + DexRegisterLocation::Kind kind = ExtractKindFromShortLocation(first_byte); + if (DexRegisterLocation::IsShortLocationKind(kind)) { + // Short location. Extract the value from the remaining 5 bits. + int32_t value = ExtractValueFromShortLocation(first_byte); + if (kind == DexRegisterLocation::Kind::kInStack) { + // Convert the stack slot (short) offset to a byte offset value. + value *= kFrameSlotSize; + } + return DexRegisterLocation(kind, value); + } else { + // Large location. Read the four next bytes to get the value. + int32_t value = region_.LoadUnaligned<int32_t>(offset + sizeof(DexRegisterLocation::Kind)); + if (kind == DexRegisterLocation::Kind::kInStackLargeOffset) { + // Convert the stack slot (large) offset to a byte offset value. + value *= kFrameSlotSize; + } + return DexRegisterLocation(kind, value); + } } - int32_t GetStackOffsetInBytes(uint16_t register_index) const { - DCHECK(GetLocationKind(register_index) == kInStack); - // We currently encode the offset in bytes. - return GetValue(register_index); + int32_t GetStackOffsetInBytes(uint16_t dex_register_index) const { + DexRegisterLocation location = GetLocationKindAndValue(dex_register_index); + DCHECK(location.GetKind() == DexRegisterLocation::Kind::kInStack); + // GetLocationKindAndValue returns the offset in bytes. + return location.GetValue(); } - int32_t GetConstant(uint16_t register_index) const { - DCHECK(GetLocationKind(register_index) == kConstant); - return GetValue(register_index); + int32_t GetConstant(uint16_t dex_register_index) const { + DexRegisterLocation location = GetLocationKindAndValue(dex_register_index); + DCHECK(location.GetKind() == DexRegisterLocation::Kind::kConstant); + return location.GetValue(); } - int32_t GetMachineRegister(uint16_t register_index) const { - DCHECK(GetLocationKind(register_index) == kInRegister - || GetLocationKind(register_index) == kInFpuRegister); - return GetValue(register_index); + int32_t GetMachineRegister(uint16_t dex_register_index) const { + DexRegisterLocation location = GetLocationKindAndValue(dex_register_index); + DCHECK(location.GetInternalKind() == DexRegisterLocation::Kind::kInRegister + || location.GetInternalKind() == DexRegisterLocation::Kind::kInFpuRegister) + << DexRegisterLocation::PrettyDescriptor(location.GetInternalKind()); + return location.GetValue(); } - static size_t SingleEntrySize() { - return sizeof(LocationKind) + sizeof(int32_t); + // Compute the compressed kind of `location`. + static DexRegisterLocation::Kind ComputeCompressedKind(const DexRegisterLocation& location) { + switch (location.GetInternalKind()) { + case DexRegisterLocation::Kind::kNone: + DCHECK_EQ(location.GetValue(), 0); + return DexRegisterLocation::Kind::kNone; + + case DexRegisterLocation::Kind::kInRegister: + DCHECK_GE(location.GetValue(), 0); + DCHECK_LT(location.GetValue(), 1 << DexRegisterMap::kValueBits); + return DexRegisterLocation::Kind::kInRegister; + + case DexRegisterLocation::Kind::kInFpuRegister: + DCHECK_GE(location.GetValue(), 0); + DCHECK_LT(location.GetValue(), 1 << DexRegisterMap::kValueBits); + return DexRegisterLocation::Kind::kInFpuRegister; + + case DexRegisterLocation::Kind::kInStack: + DCHECK_EQ(location.GetValue() % kFrameSlotSize, 0); + return IsUint<DexRegisterMap::kValueBits>(location.GetValue() / kFrameSlotSize) + ? DexRegisterLocation::Kind::kInStack + : DexRegisterLocation::Kind::kInStackLargeOffset; + + case DexRegisterLocation::Kind::kConstant: + return IsUint<DexRegisterMap::kValueBits>(location.GetValue()) + ? DexRegisterLocation::Kind::kConstant + : DexRegisterLocation::Kind::kConstantLargeValue; + + default: + LOG(FATAL) << "Unexpected location kind" + << DexRegisterLocation::PrettyDescriptor(location.GetInternalKind()); + UNREACHABLE(); + } + } + + // Can `location` be turned into a short location? + static bool CanBeEncodedAsShortLocation(const DexRegisterLocation& location) { + switch (location.GetInternalKind()) { + case DexRegisterLocation::Kind::kNone: + case DexRegisterLocation::Kind::kInRegister: + case DexRegisterLocation::Kind::kInFpuRegister: + return true; + + case DexRegisterLocation::Kind::kInStack: + DCHECK_EQ(location.GetValue() % kFrameSlotSize, 0); + return IsUint<kValueBits>(location.GetValue() / kFrameSlotSize); + + case DexRegisterLocation::Kind::kConstant: + return IsUint<kValueBits>(location.GetValue()); + + default: + UNREACHABLE(); + } + } + + static size_t EntrySize(const DexRegisterLocation& location) { + return CanBeEncodedAsShortLocation(location) + ? DexRegisterMap::SingleShortEntrySize() + : DexRegisterMap::SingleLargeEntrySize(); + } + + static size_t SingleShortEntrySize() { + return sizeof(ShortLocation); + } + + static size_t SingleLargeEntrySize() { + return sizeof(DexRegisterLocation::Kind) + sizeof(int32_t); } size_t Size() const { @@ -153,7 +405,43 @@ class DexRegisterMap { static constexpr int kFixedSize = 0; private: + // Width of the kind "field" in a short location, in bits. + static constexpr size_t kKindBits = 3; + // Width of the value "field" in a short location, in bits. + static constexpr size_t kValueBits = 5; + + static constexpr uint8_t kKindMask = (1 << kKindBits) - 1; + static constexpr int32_t kValueMask = (1 << kValueBits) - 1; + static constexpr size_t kKindOffset = 0; + static constexpr size_t kValueOffset = kKindBits; + + static ShortLocation MakeShortLocation(DexRegisterLocation::Kind kind, int32_t value) { + DCHECK(IsUint<kKindBits>(static_cast<uint8_t>(kind))) << static_cast<uint8_t>(kind); + DCHECK(IsUint<kValueBits>(value)) << value; + return (static_cast<uint8_t>(kind) & kKindMask) << kKindOffset + | (value & kValueMask) << kValueOffset; + } + + static DexRegisterLocation::Kind ExtractKindFromShortLocation(ShortLocation location) { + uint8_t kind = (location >> kKindOffset) & kKindMask; + DCHECK_LE(kind, static_cast<uint8_t>(DexRegisterLocation::Kind::kLastLocationKind)); + return static_cast<DexRegisterLocation::Kind>(kind); + } + + static int32_t ExtractValueFromShortLocation(ShortLocation location) { + return (location >> kValueOffset) & kValueMask; + } + + // Extract a location kind from the byte at position `offset`. + DexRegisterLocation::Kind ExtractKindAtOffset(size_t offset) const { + ShortLocation first_byte = region_.LoadUnaligned<ShortLocation>(offset); + return ExtractKindFromShortLocation(first_byte); + } + MemoryRegion region_; + + friend class CodeInfo; + friend class StackMapStream; }; /** @@ -187,7 +475,7 @@ class StackMap { } void SetNativePcOffset(uint32_t native_pc_offset) { - return region_.Store<uint32_t>(kNativePcOffsetOffset, native_pc_offset); + region_.Store<uint32_t>(kNativePcOffsetOffset, native_pc_offset); } uint32_t GetDexRegisterMapOffset() const { @@ -195,7 +483,7 @@ class StackMap { } void SetDexRegisterMapOffset(uint32_t offset) { - return region_.Store<uint32_t>(kDexRegisterMapOffsetOffset, offset); + region_.Store<uint32_t>(kDexRegisterMapOffsetOffset, offset); } uint32_t GetInlineDescriptorOffset() const { @@ -203,7 +491,7 @@ class StackMap { } void SetInlineDescriptorOffset(uint32_t offset) { - return region_.Store<uint32_t>(kInlineDescriptorOffsetOffset, offset); + region_.Store<uint32_t>(kInlineDescriptorOffsetOffset, offset); } uint32_t GetRegisterMask() const { @@ -238,9 +526,9 @@ class StackMap { && region_.size() == other.region_.size(); } - static size_t ComputeAlignedStackMapSize(size_t stack_mask_size) { + static size_t ComputeAlignedStackMapSize(size_t stack_map_size) { // On ARM, the stack maps must be 4-byte aligned. - return RoundUp(StackMap::kFixedSize + stack_mask_size, 4); + return RoundUp(StackMap::kFixedSize + stack_map_size, 4); } // Special (invalid) offset for the DexRegisterMapOffset field meaning @@ -252,6 +540,8 @@ class StackMap { static constexpr uint32_t kNoInlineInfo = -1; private: + // TODO: Instead of plain types such as "uint32_t", introduce + // typedefs (and document the memory layout of StackMap). static constexpr int kDexPcOffset = 0; static constexpr int kNativePcOffsetOffset = kDexPcOffset + sizeof(uint32_t); static constexpr int kDexRegisterMapOffsetOffset = kNativePcOffsetOffset + sizeof(uint32_t); @@ -317,11 +607,15 @@ class CodeInfo { return StackMap::ComputeAlignedStackMapSize(GetStackMaskSize()); } + uint32_t GetStackMapsOffset() const { + return kFixedSize; + } + DexRegisterMap GetDexRegisterMapOf(StackMap stack_map, uint32_t number_of_dex_registers) const { DCHECK(stack_map.HasDexRegisterMap()); uint32_t offset = stack_map.GetDexRegisterMapOffset(); - return DexRegisterMap(region_.Subregion(offset, - DexRegisterMap::kFixedSize + number_of_dex_registers * DexRegisterMap::SingleEntrySize())); + size_t size = ComputeDexRegisterMapSize(offset, number_of_dex_registers); + return DexRegisterMap(region_.Subregion(offset, size)); } InlineInfo GetInlineInfoOf(StackMap stack_map) const { @@ -356,6 +650,8 @@ class CodeInfo { } private: + // TODO: Instead of plain types such as "uint32_t", introduce + // typedefs (and document the memory layout of CodeInfo). static constexpr int kOverallSizeOffset = 0; static constexpr int kNumberOfStackMapsOffset = kOverallSizeOffset + sizeof(uint32_t); static constexpr int kStackMaskSizeOffset = kNumberOfStackMapsOffset + sizeof(uint32_t); @@ -367,6 +663,33 @@ class CodeInfo { : region_.Subregion(kFixedSize, StackMapSize() * GetNumberOfStackMaps()); } + // Compute the size of a Dex register map starting at offset `origin` in + // `region_` and containing `number_of_dex_registers` locations. + size_t ComputeDexRegisterMapSize(uint32_t origin, uint32_t number_of_dex_registers) const { + // TODO: Ideally, we would like to use art::DexRegisterMap::Size or + // art::DexRegisterMap::FindLocationOffset, but the DexRegisterMap is not + // yet built. Try to factor common code. + size_t offset = origin + DexRegisterMap::kFixedSize; + // Skip the first `number_of_dex_registers - 1` entries. + for (uint16_t i = 0; i < number_of_dex_registers; ++i) { + // Read the first next byte and inspect its first 3 bits to decide + // whether it is a short or a large location. + DexRegisterMap::ShortLocation first_byte = + region_.LoadUnaligned<DexRegisterMap::ShortLocation>(offset); + DexRegisterLocation::Kind kind = + DexRegisterMap::ExtractKindFromShortLocation(first_byte); + if (DexRegisterLocation::IsShortLocationKind(kind)) { + // Short location. Skip the current byte. + offset += DexRegisterMap::SingleShortEntrySize(); + } else { + // Large location. Skip the 5 next bytes. + offset += DexRegisterMap::SingleLargeEntrySize(); + } + } + size_t size = offset - origin; + return size; + } + MemoryRegion region_; friend class StackMapStream; }; diff --git a/runtime/utils.h b/runtime/utils.h index d294f4b1a1..cd04c3ff2c 100644 --- a/runtime/utils.h +++ b/runtime/utils.h @@ -173,6 +173,24 @@ static inline uint32_t High32Bits(uint64_t value) { return static_cast<uint32_t>(value >> 32); } +// Traits class providing an unsigned integer type of (byte) size `n`. +template <size_t n> +struct UnsignedIntegerType { + // No defined `type`. +}; + +template <> +struct UnsignedIntegerType<1> { typedef uint8_t type; }; + +template <> +struct UnsignedIntegerType<2> { typedef uint16_t type; }; + +template <> +struct UnsignedIntegerType<4> { typedef uint32_t type; }; + +template <> +struct UnsignedIntegerType<8> { typedef uint64_t type; }; + // Type identity. template <typename T> struct TypeIdentity { @@ -271,6 +289,12 @@ static constexpr int CTZ(T x) { } template<typename T> +static inline int WhichPowerOf2(T x) { + DCHECK((x != 0) && IsPowerOfTwo(x)); + return CTZ(x); +} + +template<typename T> static constexpr int POPCOUNT(T x) { return (sizeof(T) == sizeof(uint32_t)) ? __builtin_popcount(x) diff --git a/test/442-checker-constant-folding/src/Main.java b/test/442-checker-constant-folding/src/Main.java index de2c5c7473..6b21fed66c 100644 --- a/test/442-checker-constant-folding/src/Main.java +++ b/test/442-checker-constant-folding/src/Main.java @@ -16,6 +16,18 @@ public class Main { + public static void assertIntEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void assertLongEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + /** * Tiny three-register program exercising int constant folding * on negation. @@ -219,41 +231,203 @@ public class Main { return c; } - public static void main(String[] args) { - if (IntNegation() != -42) { - throw new Error(); - } + /** + * Test optimizations of arithmetic identities yielding a constant result. + */ - if (IntAddition1() != 3) { - throw new Error(); - } + // CHECK-START: int Main.And0(int) constant_folding (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-DAG: [[And:i\d+]] And [ [[Arg]] [[Const0]] ] + // CHECK-DAG: Return [ [[And]] ] - if (IntAddition2() != 14) { - throw new Error(); - } + // CHECK-START: int Main.And0(int) constant_folding (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-NOT: And + // CHECK-DAG: Return [ [[Const0]] ] - if (IntSubtraction() != 4) { - throw new Error(); - } + public static int And0(int arg) { + return arg & 0; + } - if (LongAddition() != 3L) { - throw new Error(); - } + // CHECK-START: long Main.Mul0(long) constant_folding (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Const0:j\d+]] LongConstant 0 + // CHECK-DAG: [[Mul:j\d+]] Mul [ [[Arg]] [[Const0]] ] + // CHECK-DAG: Return [ [[Mul]] ] - if (LongSubtraction() != 4L) { - throw new Error(); - } + // CHECK-START: long Main.Mul0(long) constant_folding (after) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Const0:j\d+]] LongConstant 0 + // CHECK-NOT: Mul + // CHECK-DAG: Return [ [[Const0]] ] - if (StaticCondition() != 5) { - throw new Error(); - } + public static long Mul0(long arg) { + return arg * 0; + } - if (JumpsAndConditionals(true) != 7) { - throw new Error(); - } + // CHECK-START: int Main.OrAllOnes(int) constant_folding (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[ConstF:i\d+]] IntConstant -1 + // CHECK-DAG: [[Or:i\d+]] Or [ [[Arg]] [[ConstF]] ] + // CHECK-DAG: Return [ [[Or]] ] - if (JumpsAndConditionals(false) != 3) { - throw new Error(); - } + // CHECK-START: int Main.OrAllOnes(int) constant_folding (after) + // CHECK-DAG: [[ConstF:i\d+]] IntConstant -1 + // CHECK-NOT: Or + // CHECK-DAG: Return [ [[ConstF]] ] + + public static int OrAllOnes(int arg) { + return arg | -1; + } + + // CHECK-START: long Main.Rem0(long) constant_folding (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Const0:j\d+]] LongConstant 0 + // CHECK-DAG: [[DivZeroCheck:j\d+]] DivZeroCheck [ [[Arg]] ] + // CHECK-DAG: [[Rem:j\d+]] Rem [ [[Const0]] [[DivZeroCheck]] ] + // CHECK-DAG: Return [ [[Rem]] ] + + // CHECK-START: long Main.Rem0(long) constant_folding (after) + // CHECK-DAG: [[Const0:j\d+]] LongConstant 0 + // CHECK-NOT: Rem + // CHECK-DAG: Return [ [[Const0]] ] + + public static long Rem0(long arg) { + return 0 % arg; + } + + // CHECK-START: int Main.Rem1(int) constant_folding (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const1:i\d+]] IntConstant 1 + // CHECK-DAG: [[Rem:i\d+]] Rem [ [[Arg]] [[Const1]] ] + // CHECK-DAG: Return [ [[Rem]] ] + + // CHECK-START: int Main.Rem1(int) constant_folding (after) + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-NOT: Rem + // CHECK-DAG: Return [ [[Const0]] ] + + public static int Rem1(int arg) { + return arg % 1; + } + + // CHECK-START: long Main.RemN1(long) constant_folding (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[ConstN1:j\d+]] LongConstant -1 + // CHECK-DAG: [[DivZeroCheck:j\d+]] DivZeroCheck [ [[Arg]] ] + // CHECK-DAG: [[Rem:j\d+]] Rem [ [[Arg]] [[DivZeroCheck]] ] + // CHECK-DAG: Return [ [[Rem]] ] + + // CHECK-START: long Main.RemN1(long) constant_folding (after) + // CHECK-DAG: [[Const0:j\d+]] LongConstant 0 + // CHECK-NOT: Rem + // CHECK-DAG: Return [ [[Const0]] ] + + public static long RemN1(long arg) { + return arg % -1; + } + + // CHECK-START: int Main.Shl0(int) constant_folding (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-DAG: [[Shl:i\d+]] Shl [ [[Const0]] [[Arg]] ] + // CHECK-DAG: Return [ [[Shl]] ] + + // CHECK-START: int Main.Shl0(int) constant_folding (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-NOT: Shl + // CHECK-DAG: Return [ [[Const0]] ] + + public static int Shl0(int arg) { + return 0 << arg; + } + + // CHECK-START: long Main.Shr0(int) constant_folding (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const0:j\d+]] LongConstant 0 + // CHECK-DAG: [[Shr:j\d+]] Shr [ [[Const0]] [[Arg]] ] + // CHECK-DAG: Return [ [[Shr]] ] + + // CHECK-START: long Main.Shr0(int) constant_folding (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const0:j\d+]] LongConstant 0 + // CHECK-NOT: Shr + // CHECK-DAG: Return [ [[Const0]] ] + + public static long Shr0(int arg) { + return (long)0 >> arg; + } + + // CHECK-START: long Main.SubSameLong(long) constant_folding (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Sub:j\d+]] Sub [ [[Arg]] [[Arg]] ] + // CHECK-DAG: Return [ [[Sub]] ] + + // CHECK-START: long Main.SubSameLong(long) constant_folding (after) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Const0:j\d+]] LongConstant 0 + // CHECK-NOT: Sub + // CHECK-DAG: Return [ [[Const0]] ] + + public static long SubSameLong(long arg) { + return arg - arg; + } + + // CHECK-START: int Main.UShr0(int) constant_folding (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-DAG: [[UShr:i\d+]] UShr [ [[Const0]] [[Arg]] ] + // CHECK-DAG: Return [ [[UShr]] ] + + // CHECK-START: int Main.UShr0(int) constant_folding (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-NOT: UShr + // CHECK-DAG: Return [ [[Const0]] ] + + public static int UShr0(int arg) { + return 0 >>> arg; + } + + // CHECK-START: int Main.XorSameInt(int) constant_folding (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Xor:i\d+]] Xor [ [[Arg]] [[Arg]] ] + // CHECK-DAG: Return [ [[Xor]] ] + + // CHECK-START: int Main.XorSameInt(int) constant_folding (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-NOT: Xor + // CHECK-DAG: Return [ [[Const0]] ] + + public static int XorSameInt(int arg) { + return arg ^ arg; + } + + public static void main(String[] args) { + assertIntEquals(IntNegation(), -42); + assertIntEquals(IntAddition1(), 3); + assertIntEquals(IntAddition2(), 14); + assertIntEquals(IntSubtraction(), 4); + assertLongEquals(LongAddition(), 3L); + assertLongEquals(LongSubtraction(), 4L); + assertIntEquals(StaticCondition(), 5); + assertIntEquals(JumpsAndConditionals(true), 7); + assertIntEquals(JumpsAndConditionals(false), 3); + int random = 123456; // Chosen randomly. + assertIntEquals(And0(random), 0); + assertLongEquals(Mul0(random), 0); + assertIntEquals(OrAllOnes(random), -1); + assertLongEquals(Rem0(random), 0); + assertIntEquals(Rem1(random), 0); + assertLongEquals(RemN1(random), 0); + assertIntEquals(Shl0(random), 0); + assertLongEquals(Shr0(random), 0); + assertLongEquals(SubSameLong(random), 0); + assertIntEquals(UShr0(random), 0); + assertIntEquals(XorSameInt(random), 0); } } diff --git a/test/458-checker-instruction-simplification/expected.txt b/test/458-checker-instruction-simplification/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/458-checker-instruction-simplification/expected.txt diff --git a/test/458-checker-instruction-simplification/info.txt b/test/458-checker-instruction-simplification/info.txt new file mode 100644 index 0000000000..09da84b925 --- /dev/null +++ b/test/458-checker-instruction-simplification/info.txt @@ -0,0 +1 @@ +Tests arithmetic identities optimizations in the optimizing compiler. diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java new file mode 100644 index 0000000000..ef6428ddd4 --- /dev/null +++ b/test/458-checker-instruction-simplification/src/Main.java @@ -0,0 +1,300 @@ +/* +* Copyright (C) 2015 The Android Open Source Project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +public class Main { + + public static void assertIntEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void assertLongEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + /** + * Tiny programs exercising optimizations of arithmetic identities. + */ + + // CHECK-START: long Main.Add0(long) instruction_simplifier (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Const0:j\d+]] LongConstant 0 + // CHECK-DAG: [[Add:j\d+]] Add [ [[Const0]] [[Arg]] ] + // CHECK-DAG: Return [ [[Add]] ] + + // CHECK-START: long Main.Add0(long) instruction_simplifier (after) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-NOT: Add + // CHECK-DAG: Return [ [[Arg]] ] + + public static long Add0(long arg) { + return 0 + arg; + } + + // CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[ConstF:i\d+]] IntConstant -1 + // CHECK-DAG: [[And:i\d+]] And [ [[Arg]] [[ConstF]] ] + // CHECK-DAG: Return [ [[And]] ] + + // CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-NOT: And + // CHECK-DAG: Return [ [[Arg]] ] + + public static int AndAllOnes(int arg) { + return arg & -1; + } + + // CHECK-START: long Main.Div1(long) instruction_simplifier (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Const1:j\d+]] LongConstant 1 + // CHECK-DAG: [[Div:j\d+]] Div [ [[Arg]] [[Const1]] ] + // CHECK-DAG: Return [ [[Div]] ] + + // CHECK-START: long Main.Div1(long) instruction_simplifier (after) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-NOT: Div + // CHECK-DAG: Return [ [[Arg]] ] + + public static long Div1(long arg) { + return arg / 1; + } + + // CHECK-START: int Main.DivN1(int) instruction_simplifier (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[ConstN1:i\d+]] IntConstant -1 + // CHECK-DAG: [[Div:i\d+]] Div [ [[Arg]] [[ConstN1]] ] + // CHECK-DAG: Return [ [[Div]] ] + + // CHECK-START: int Main.DivN1(int) instruction_simplifier (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Arg]] ] + // CHECK-NOT: Div + // CHECK-DAG: Return [ [[Neg]] ] + + public static int DivN1(int arg) { + return arg / -1; + } + + // CHECK-START: long Main.Mul1(long) instruction_simplifier (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Const1:j\d+]] LongConstant 1 + // CHECK-DAG: [[Mul:j\d+]] Mul [ [[Arg]] [[Const1]] ] + // CHECK-DAG: Return [ [[Mul]] ] + + // CHECK-START: long Main.Mul1(long) instruction_simplifier (after) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-NOT: Mul + // CHECK-DAG: Return [ [[Arg]] ] + + public static long Mul1(long arg) { + return arg * 1; + } + + // CHECK-START: int Main.MulN1(int) instruction_simplifier (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[ConstN1:i\d+]] IntConstant -1 + // CHECK-DAG: [[Mul:i\d+]] Mul [ [[Arg]] [[ConstN1]] ] + // CHECK-DAG: Return [ [[Mul]] ] + + // CHECK-START: int Main.MulN1(int) instruction_simplifier (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Arg]] ] + // CHECK-NOT: Mul + // CHECK-DAG: Return [ [[Neg]] ] + + public static int MulN1(int arg) { + return arg * -1; + } + + // CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Const128:j\d+]] LongConstant 128 + // CHECK-DAG: [[Mul:j\d+]] Mul [ [[Arg]] [[Const128]] ] + // CHECK-DAG: Return [ [[Mul]] ] + + // CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Const7:i\d+]] IntConstant 7 + // CHECK-DAG: [[Shl:j\d+]] Shl [ [[Arg]] [[Const7]] ] + // CHECK-NOT: Mul + // CHECK-DAG: Return [ [[Shl]] ] + + public static long MulPowerOfTwo128(long arg) { + return arg * 128; + } + + // CHECK-START: int Main.Or0(int) instruction_simplifier (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-DAG: [[Or:i\d+]] Or [ [[Arg]] [[Const0]] ] + // CHECK-DAG: Return [ [[Or]] ] + + // CHECK-START: int Main.Or0(int) instruction_simplifier (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-NOT: Or + // CHECK-DAG: Return [ [[Arg]] ] + + public static int Or0(int arg) { + return arg | 0; + } + + // CHECK-START: long Main.OrSame(long) instruction_simplifier (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Or:j\d+]] Or [ [[Arg]] [[Arg]] ] + // CHECK-DAG: Return [ [[Or]] ] + + // CHECK-START: long Main.OrSame(long) instruction_simplifier (after) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-NOT: Or + // CHECK-DAG: Return [ [[Arg]] ] + + public static long OrSame(long arg) { + return arg | arg; + } + + // CHECK-START: int Main.Shl0(int) instruction_simplifier (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-DAG: [[Shl:i\d+]] Shl [ [[Arg]] [[Const0]] ] + // CHECK-DAG: Return [ [[Shl]] ] + + // CHECK-START: int Main.Shl0(int) instruction_simplifier (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-NOT: Shl + // CHECK-DAG: Return [ [[Arg]] ] + + public static int Shl0(int arg) { + return arg << 0; + } + + // CHECK-START: long Main.Shr0(long) instruction_simplifier (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-DAG: [[Shr:j\d+]] Shr [ [[Arg]] [[Const0]] ] + // CHECK-DAG: Return [ [[Shr]] ] + + // CHECK-START: long Main.Shr0(long) instruction_simplifier (after) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-NOT: Shr + // CHECK-DAG: Return [ [[Arg]] ] + + public static long Shr0(long arg) { + return arg >> 0; + } + + // CHECK-START: long Main.Sub0(long) instruction_simplifier (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Const0:j\d+]] LongConstant 0 + // CHECK-DAG: [[Sub:j\d+]] Sub [ [[Arg]] [[Const0]] ] + // CHECK-DAG: Return [ [[Sub]] ] + + // CHECK-START: long Main.Sub0(long) instruction_simplifier (after) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-NOT: Sub + // CHECK-DAG: Return [ [[Arg]] ] + + public static long Sub0(long arg) { + return arg - 0; + } + + // CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Const0]] [[Arg]] ] + // CHECK-DAG: Return [ [[Sub]] ] + + // CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Arg]] ] + // CHECK-NOT: Sub + // CHECK-DAG: Return [ [[Neg]] ] + + public static int SubAliasNeg(int arg) { + return 0 - arg; + } + + // CHECK-START: long Main.UShr0(long) instruction_simplifier (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-DAG: [[UShr:j\d+]] UShr [ [[Arg]] [[Const0]] ] + // CHECK-DAG: Return [ [[UShr]] ] + + // CHECK-START: long Main.UShr0(long) instruction_simplifier (after) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-NOT: UShr + // CHECK-DAG: Return [ [[Arg]] ] + + public static long UShr0(long arg) { + return arg >>> 0; + } + + // CHECK-START: int Main.Xor0(int) instruction_simplifier (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Const0:i\d+]] IntConstant 0 + // CHECK-DAG: [[Xor:i\d+]] Xor [ [[Arg]] [[Const0]] ] + // CHECK-DAG: Return [ [[Xor]] ] + + // CHECK-START: int Main.Xor0(int) instruction_simplifier (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-NOT: Xor + // CHECK-DAG: Return [ [[Arg]] ] + + public static int Xor0(int arg) { + return arg ^ 0; + } + + // CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[ConstF:i\d+]] IntConstant -1 + // CHECK-DAG: [[Xor:i\d+]] Xor [ [[Arg]] [[ConstF]] ] + // CHECK-DAG: Return [ [[Xor]] ] + + // CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Not:i\d+]] Not [ [[Arg]] ] + // CHECK-NOT: Xor + // CHECK-DAG: Return [ [[Not]] ] + + public static int XorAllOnes(int arg) { + return arg ^ -1; + } + + public static void main(String[] args) { + int arg = 123456; + + assertLongEquals(Add0(arg), arg); + assertIntEquals(AndAllOnes(arg), arg); + assertLongEquals(Div1(arg), arg); + assertIntEquals(DivN1(arg), -arg); + assertLongEquals(Mul1(arg), arg); + assertIntEquals(MulN1(arg), -arg); + assertLongEquals(MulPowerOfTwo128(arg), (128 * arg)); + assertIntEquals(Or0(arg), arg); + assertLongEquals(OrSame(arg), arg); + assertIntEquals(Shl0(arg), arg); + assertLongEquals(Shr0(arg), arg); + assertLongEquals(Sub0(arg), arg); + assertIntEquals(SubAliasNeg(arg), -arg); + assertLongEquals(UShr0(arg), arg); + assertIntEquals(Xor0(arg), arg); + assertIntEquals(XorAllOnes(arg), ~arg); + } +} diff --git a/test/460-multiple-returns3/expected.txt b/test/460-multiple-returns3/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/460-multiple-returns3/expected.txt diff --git a/test/460-multiple-returns3/info.txt b/test/460-multiple-returns3/info.txt new file mode 100644 index 0000000000..cdd354bbdf --- /dev/null +++ b/test/460-multiple-returns3/info.txt @@ -0,0 +1,2 @@ +Tests inlining of a pattern not generated by DX: multiple +returns in a single method. diff --git a/test/460-multiple-returns3/smali/MultipleReturns.smali b/test/460-multiple-returns3/smali/MultipleReturns.smali new file mode 100644 index 0000000000..38569a7aa9 --- /dev/null +++ b/test/460-multiple-returns3/smali/MultipleReturns.smali @@ -0,0 +1,40 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LMultipleReturns; + +.super Ljava/lang/Object; + +.method public static caller()S + .registers 1 + invoke-static {}, LMultipleReturns;->$opt$CalleeReturnShort()S + move-result v0 + return v0 +.end method + +.method public static $opt$CalleeReturnShort()S + .registers 2 + const/4 v0, 0x0 + const/4 v1, 0x1 + if-eq v1, v0, :else + if-eq v1, v0, :else2 + const/4 v0, 0x4 + :else2 + return v0 + :else + if-eq v1, v0, :else3 + const/4 v1, 0x1 + :else3 + return v1 +.end method diff --git a/test/460-multiple-returns3/src/Main.java b/test/460-multiple-returns3/src/Main.java new file mode 100644 index 0000000000..fb8a1151d1 --- /dev/null +++ b/test/460-multiple-returns3/src/Main.java @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Method; + +public class Main { + + // Workaround for b/18051191. + class InnerClass {} + + public static void main(String[] args) throws Exception { + Class<?> c = Class.forName("MultipleReturns"); + Method m = c.getMethod("caller"); + short result = (Short) m.invoke(null); + if (result != 4) { + throw new Error("Expected 4, got " + result); + } + } +} |