diff options
70 files changed, 2706 insertions, 658 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 0afec2d5ef..dcde5abbca 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -188,6 +188,7 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \ runtime/gc/accounting/card_table_test.cc \ runtime/gc/accounting/mod_union_table_test.cc \ runtime/gc/accounting/space_bitmap_test.cc \ + runtime/gc/collector/immune_spaces_test.cc \ runtime/gc/heap_test.cc \ runtime/gc/reference_queue_test.cc \ runtime/gc/space/dlmalloc_space_base_test.cc \ diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc index f34b5edcc4..529143d93d 100644 --- a/cmdline/cmdline_parser_test.cc +++ b/cmdline/cmdline_parser_test.cc @@ -457,8 +457,10 @@ TEST_F(CmdlineParserTest, TestJitOptions) { EXPECT_SINGLE_PARSE_VALUE(false, "-Xusejit:false", M::UseJIT); } { - EXPECT_SINGLE_PARSE_VALUE(MemoryKiB(16 * KB), "-Xjitcodecachesize:16K", M::JITCodeCacheCapacity); - EXPECT_SINGLE_PARSE_VALUE(MemoryKiB(16 * MB), "-Xjitcodecachesize:16M", M::JITCodeCacheCapacity); + EXPECT_SINGLE_PARSE_VALUE( + MemoryKiB(16 * KB), "-Xjitinitialsize:16K", M::JITCodeCacheInitialCapacity); + EXPECT_SINGLE_PARSE_VALUE( + MemoryKiB(16 * MB), "-Xjitmaxsize:16M", M::JITCodeCacheMaxCapacity); } { EXPECT_SINGLE_PARSE_VALUE(12345u, "-Xjitthreshold:12345", M::JITCompileThreshold); diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index a121f8b7a0..7b0e5af246 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -128,6 +128,7 @@ class CommonCompilerTest : public CommonRuntimeTest { #define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \ if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) { \ switch (GetInstructionSet()) { \ + case kThumb2: \ case kX86: \ case kX86_64: \ /* Instruction set has read barrier support. */ \ diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 2b60a51e22..5da72147b0 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -1104,7 +1104,11 @@ void Mir2Lir::GenNewInstance(uint32_t type_idx, RegLocation rl_dest) { // access because the verifier was unable to? const DexFile* dex_file = cu_->dex_file; CompilerDriver* driver = cu_->compiler_driver; - if (driver->CanAccessInstantiableTypeWithoutChecks(cu_->method_idx, *dex_file, type_idx)) { + bool finalizable; + if (driver->CanAccessInstantiableTypeWithoutChecks(cu_->method_idx, + *dex_file, + type_idx, + &finalizable)) { bool is_type_initialized; bool use_direct_type_ptr; uintptr_t direct_type_ptr; diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index bf3a8658da..e42a73723b 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -1208,7 +1208,8 @@ bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx, const Dex bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file, - uint32_t type_idx) { + uint32_t type_idx, + bool* finalizable) { ScopedObjectAccess soa(Thread::Current()); mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache( soa.Self(), dex_file, false); @@ -1216,8 +1217,11 @@ bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_id mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx); if (resolved_class == nullptr) { stats_->TypeNeedsAccessCheck(); + // Be conservative. + *finalizable = true; return false; // Unknown class needs access checks. } + *finalizable = resolved_class->IsFinalizable(); const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx); mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_); if (referrer_class == nullptr) { diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 5683b03a71..dae785b688 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -211,8 +211,11 @@ class CompilerDriver { REQUIRES(!Locks::mutator_lock_); // Are runtime access and instantiable checks necessary in the code? - bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file, - uint32_t type_idx) + // out_is_finalizable is set to whether the type is finalizable. + bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx, + const DexFile& dex_file, + uint32_t type_idx, + bool* out_is_finalizable) REQUIRES(!Locks::mutator_lock_); bool CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx, diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 167c35d075..3257de1858 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -1449,7 +1449,8 @@ void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc, uint32_t* args, uint32_t register_index) { HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc); - QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) + bool finalizable; + QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable) ? kQuickAllocArrayWithAccessCheck : kQuickAllocArray; HInstruction* object = new (arena_) HNewArray(length, @@ -1629,9 +1630,9 @@ void HGraphBuilder::BuildTypeCheck(const Instruction& instruction, } } -bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index) const { +bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const { return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks( - dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index); + dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index, finalizable); } void HGraphBuilder::BuildSwitchJumpTable(const SwitchTable& table, @@ -2508,7 +2509,9 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 current_block_->AddInstruction(fake_string); UpdateLocal(register_index, fake_string, dex_pc); } else { - QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) + bool finalizable; + bool can_throw = NeedsAccessCheck(type_index, &finalizable); + QuickEntrypointEnum entrypoint = can_throw ? kQuickAllocObjectWithAccessCheck : kQuickAllocObject; @@ -2517,6 +2520,8 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 dex_pc, type_index, *dex_compilation_unit_->GetDexFile(), + can_throw, + finalizable, entrypoint)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc); } @@ -2526,7 +2531,8 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 case Instruction::NEW_ARRAY: { uint16_t type_index = instruction.VRegC_22c(); HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt, dex_pc); - QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) + bool finalizable; + QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable) ? kQuickAllocArrayWithAccessCheck : kQuickAllocArray; current_block_->AddInstruction(new (arena_) HNewArray(length, diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 9eaa4b62c5..f857ef0e12 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -138,7 +138,10 @@ class HGraphBuilder : public ValueObject { HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const; void PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex_pc); void InitializeParameters(uint16_t number_of_parameters); - bool NeedsAccessCheck(uint32_t type_index) const; + + // Returns whether the current method needs access check for the type. + // Output parameter finalizable is set to whether the type is finalizable. + bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const; template<typename T> void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 77d53fcd8f..0baa0e30dc 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -383,11 +383,11 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect(); switch (call->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - locations->SetInAt(call->GetCurrentMethodInputIndex(), visitor->GetMethodLocation()); + locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: locations->AddTemp(visitor->GetMethodLocation()); - locations->SetInAt(call->GetCurrentMethodInputIndex(), Location::RequiresRegister()); + locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister()); break; default: locations->AddTemp(visitor->GetMethodLocation()); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 655bbb8a8e..cb6bed08ec 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -34,6 +34,9 @@ namespace art { +template<class MirrorType> +class GcRoot; + namespace arm { static bool ExpectedPairLayout(Location location) { @@ -286,15 +289,6 @@ class TypeCheckSlowPathARM : public SlowPathCode { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - Register obj = locations->InAt(0).AsRegister<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -315,6 +309,8 @@ class TypeCheckSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); } else { DCHECK(instruction_->IsCheckCast()); @@ -322,6 +318,7 @@ class TypeCheckSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } if (!is_fatal_) { @@ -408,6 +405,221 @@ class ArraySetSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { + public: + ReadBarrierForHeapReferenceSlowPathARM(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ LoadFromOffset(kLoadWord, out, out, offset); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = index_.AsRegister<Register>(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::arm::Thumb2Assembler::Lsl and + // art::arm::Thumb2Assembler::AddConstant below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ Mov(free_reg, index_reg); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ Lsl(index_reg, index_reg, TIMES_4); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ AddConstant(index_reg, index_reg, offset_); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ LoadImmediate(calling_convention.GetRegisterAt(2), offset_); + } + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + arm_codegen->Move32(out_, Location::RegisterLocation(R0)); + + RestoreLiveRegisters(codegen, locations); + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<Register>()); + size_t obj = static_cast<int>(obj_.AsRegister<Register>()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return static_cast<Register>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on ARM + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathARM : public SlowPathCode { + public: + ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + arm_codegen->Move32(out_, Location::RegisterLocation(R0)); + + RestoreLiveRegisters(codegen, locations); + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM); +}; + #undef __ #define __ down_cast<ArmAssembler*>(GetAssembler())-> @@ -581,7 +793,7 @@ Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { LOG(FATAL) << "Unreachable type " << type; } - return Location(); + return Location::NoLocation(); } void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const { @@ -820,7 +1032,7 @@ Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type t LOG(FATAL) << "Unexpected parameter type " << type; break; } - return Location(); + return Location::NoLocation(); } Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) const { @@ -847,7 +1059,7 @@ Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type } case Primitive::kPrimVoid: - return Location(); + return Location::NoLocation(); } UNREACHABLE(); @@ -1762,29 +1974,39 @@ void LocationsBuilderARM::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); + LocationSummary* locations = invoke->GetLocations(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register hidden_reg = locations->GetTemp(1).AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // Set the hidden argument. - __ LoadImmediate(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(), - invoke->GetDexMethodIndex()); + // Set the hidden argument. This is safe to do this here, as R12 + // won't be modified thereafter, before the `blx` (call) instruction. + DCHECK_EQ(R12, hidden_reg); + __ LoadImmediate(hidden_reg, invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex()); + // /* HeapReference<Class> */ temp = temp->klass_ __ LoadFromOffset(kLoadWord, temp, temp, class_offset); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetImtEntryAt(method_offset); - uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArmWordSize).Int32Value(); + uint32_t entry_point = + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value(); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // LR = temp->GetEntryPoint(); __ LoadFromOffset(kLoadWord, LR, temp, entry_point); @@ -3407,6 +3629,9 @@ void InstructionCodeGeneratorARM::GenerateWideAtomicLoad(Register addr, Register out_lo, Register out_hi) { if (offset != 0) { + // Ensure `out_lo` is different from `addr`, so that loading + // `offset` into `out_lo` does not clutter `addr`. + DCHECK_NE(out_lo, addr); __ LoadImmediate(out_lo, offset); __ add(IP, addr, ShifterOperand(out_lo)); addr = IP; @@ -3594,14 +3819,26 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); bool volatile_for_double = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble) && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); - bool overlap = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong); + // The output overlaps in case of volatile long: we don't want the + // code generated by GenerateWideAtomicLoad to overwrite the + // object's location. Likewise, in the case of an object field get + // with read barriers enabled, we do not want the load to overwrite + // the object's location, as we need it to emit the read barrier. + bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) || + object_field_get_with_read_barrier; if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); @@ -3667,7 +3904,8 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - Register base = locations->InAt(0).AsRegister<Register>(); + Location base_loc = locations->InAt(0); + Register base = base_loc.AsRegister<Register>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); @@ -3747,7 +3985,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(out.AsRegister<Register>()); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); } } @@ -3891,20 +4129,31 @@ void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) { } void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); Primitive::Type type = instruction->GetType(); @@ -3967,8 +4216,9 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: case Primitive::kPrimNot: { - static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { @@ -4031,8 +4281,17 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { - Register out = locations->Out().AsRegister<Register>(); - __ MaybeUnpoisonHeapReference(out); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Location out = locations->Out(); + if (index.IsConstant()) { + uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); + } } } @@ -4041,11 +4300,16 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(value_type)) { @@ -4053,20 +4317,20 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { } else { locations->SetInAt(2, Location::RequiresRegister()); } - if (needs_write_barrier) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. - locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); // Possibly used for read barrier too. } } void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register array = locations->InAt(0).AsRegister<Register>(); + Location array_loc = locations->InAt(0); + Register array = array_loc.AsRegister<Register>(); Location index = locations->InAt(1); Primitive::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -4103,7 +4367,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimNot: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register value = locations->InAt(2).AsRegister<Register>(); + Location value_loc = locations->InAt(2); + Register value = value_loc.AsRegister<Register>(); Register source = value; if (instruction->InputAt(2)->IsNullConstant()) { @@ -4117,6 +4382,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, source, IP, data_offset); } + DCHECK(!needs_write_barrier); + DCHECK(!may_need_runtime_call_for_type_check); break; } @@ -4129,7 +4396,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { Label done; SlowPathCode* slow_path = nullptr; - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -4149,23 +4416,63 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ Bind(&non_zero); } - __ LoadFromOffset(kLoadWord, temp1, array, class_offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ MaybeUnpoisonHeapReference(temp1); - __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - __ LoadFromOffset(kLoadWord, temp2, value, class_offset); - // No need to poison/unpoison, we're comparing two poisoined references. - __ cmp(temp1, ShifterOperand(temp2)); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - Label do_put; - __ b(&do_put, EQ); - __ MaybeUnpoisonHeapReference(temp1); - __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); - // No need to poison/unpoison, we're comparing against null. - __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ Mov(temp2, temp1); + // // /* HeapReference<Class> */ temp1 = temp1->component_type_ + // __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + // codegen_->GenerateReadBarrier( + // instruction, temp1_loc, temp1_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = value->klass_ + // __ LoadFromOffset(kLoadWord, temp2, value, class_offset); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc); + // + // __ cmp(temp1, ShifterOperand(temp2)); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ b(slow_path->GetEntryLabel()); } else { - __ b(slow_path->GetEntryLabel(), NE); + // /* HeapReference<Class> */ temp1 = array->klass_ + __ LoadFromOffset(kLoadWord, temp1, array, class_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ LoadFromOffset(kLoadWord, temp2, value, class_offset); + // If heap poisoning is enabled, no need to unpoison `temp1` + // nor `temp2`, as we are comparing two poisoned references. + __ cmp(temp1, ShifterOperand(temp2)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + Label do_put; + __ b(&do_put, EQ); + // If heap poisoning is enabled, the `temp1` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // If heap poisoning is enabled, no need to unpoison + // `temp1`, as we are comparing against null below. + __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ b(slow_path->GetEntryLabel(), NE); + } } } @@ -4189,7 +4496,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ StoreToOffset(kStoreWord, source, IP, data_offset); } - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -4618,7 +4925,8 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(R0)); + Location::RegisterLocation(R0), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { @@ -4632,21 +4940,42 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { return; } - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); + if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ LoadFromOffset( - kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ AddConstant(out, current_method, declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); + } } else { DCHECK(cls->CanCallRuntime()); + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - // TODO: We will need a read barrier here. + + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ AddConstant(out, out, cache_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ LoadFromOffset(kLoadWord, out, out, cache_offset); + } SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); @@ -4701,13 +5030,35 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { codegen_->AddSlowPath(slow_path); LocationSummary* locations = load->GetLocations(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - __ LoadFromOffset( - kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ AddConstant(out, current_method, declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - // TODO: We will need a read barrier here. + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ AddConstant(out, out, cache_offset); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ LoadFromOffset(kLoadWord, out, out, cache_offset); + } + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -4750,41 +5101,45 @@ void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) { void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // The out register is used as a temporary, so it overlaps with the inputs. - // Note that TypeCheckSlowPathARM uses this register too. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetOut(Location::RegisterLocation(R0)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // The "out" register is used as a temporary, so it overlaps with the inputs. + // Note that TypeCheckSlowPathARM uses this register too. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -4798,15 +5153,9 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ CompareAndBranchIfZero(obj, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ LoadFromOffset(kLoadWord, target, obj, class_offset); - __ MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ LoadFromOffset(kLoadWord, out, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -4817,13 +5166,23 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ b(&done); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. Label loop; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ LoadFromOffset(kLoadWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ cmp(out, ShifterOperand(cls)); @@ -4834,14 +5193,24 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. Label loop, success; __ Bind(&loop); __ cmp(out, ShifterOperand(cls)); __ b(&success, EQ); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ LoadFromOffset(kLoadWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ CompareAndBranchIfNonZero(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ b(&done); @@ -4852,14 +5221,24 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. Label exact_check; __ cmp(out, ShifterOperand(cls)); __ b(&exact_check, EQ); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ LoadFromOffset(kLoadWord, out, out, component_offset); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -4870,11 +5249,12 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ b(&done); break; } + case TypeCheckKind::kArrayCheck: { __ cmp(out, ShifterOperand(cls)); DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ b(slow_path->GetEntryLabel(), NE); __ LoadImmediate(out, 1); @@ -4883,13 +5263,25 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved & interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ b(&done); } @@ -4915,57 +5307,61 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Note that TypeCheckSlowPathARM uses this register too. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Note that TypeCheckSlowPathARM uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } } void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); - Register temp = locations->WillCall() - ? Register(kNoRegister) - : locations->GetTemp(0).AsRegister<Register>(); - + Location temp_loc = locations->GetTemp(0); + Register temp = temp_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCode* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCode* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); Label done; // Avoid null check if we know obj is not null. @@ -4973,76 +5369,159 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ CompareAndBranchIfZero(obj, &done); } - if (locations->WillCall()) { - __ LoadFromOffset(kLoadWord, obj, obj, class_offset); - __ MaybeUnpoisonHeapReference(obj); - } else { - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { __ cmp(temp, ShifterOperand(cls)); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ b(slow_path->GetEntryLabel(), NE); + __ b(type_check_slow_path->GetEntryLabel(), NE); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - Label loop; + Label loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); - // Jump to the slow path to throw the exception. - __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. + __ CompareAndBranchIfNonZero(temp, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); __ cmp(temp, ShifterOperand(cls)); __ b(&loop, NE); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. Label loop; __ Bind(&loop); __ cmp(temp, ShifterOperand(cls)); __ b(&done, EQ); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ CompareAndBranchIfNonZero(temp, &loop); - // Jump to the slow path to throw the exception. - __ b(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + Label check_non_primitive_component_type; __ cmp(temp, ShifterOperand(cls)); __ b(&done, EQ); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ LoadFromOffset(kLoadWord, temp, temp, component_offset); - __ MaybeUnpoisonHeapReference(temp); - __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. + __ CompareAndBranchIfNonZero(temp, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot"); + __ CompareAndBranchIfZero(temp, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved & + // interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ b(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) { @@ -5216,6 +5695,82 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr } } +void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ b(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<Register>()); + } +} + +void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ b(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method) { @@ -5273,7 +5828,7 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, invoke->GetStringInitOffset()); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress()); @@ -5288,7 +5843,7 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, LOG(FATAL) << "Unsupported"; UNREACHABLE(); case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; Register reg = temp.AsRegister<Register>(); if (current_method.IsRegister()) { @@ -5299,10 +5854,11 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, method_reg = reg; __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); } - // temp = current_method->dex_cache_resolved_methods_; - __ LoadFromOffset( - kLoadWord, reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset( - kArmPointerSize).Int32Value()); + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; + __ LoadFromOffset(kLoadWord, + reg, + method_reg, + ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value()); // temp = temp[index_in_cache] uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index; __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); @@ -5346,10 +5902,17 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // temp = object->GetClass(); DCHECK(receiver.IsRegister()); + // /* HeapReference<Class> */ temp = receiver->klass_ __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetMethodAt(method_offset); uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 32bfe0f0be..89de4f801d 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -373,6 +373,51 @@ class CodeGeneratorARM : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + private: using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 70bf7357ee..2776b7d6c9 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -68,6 +68,10 @@ using helpers::ARM64EncodableConstantOrRegister; using helpers::ArtVixlRegCodeCoherentForRegSet; static constexpr int kCurrentMethodStackOffset = 0; +// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump +// table version generates 7 instructions and num_entries literals. Compare/jump sequence will +// generates less code/data with a small num_entries. +static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; inline Condition ARM64Condition(IfCondition cond) { switch (cond) { @@ -545,6 +549,28 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64); }; +void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { + uint32_t num_entries = switch_instr_->GetNumEntries(); + DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold); + + // We are about to use the assembler to place literals directly. Make sure we have enough + // underlying code buffer and we have generated the jump table with right size. + CodeBufferCheckScope scope(codegen->GetVIXLAssembler(), num_entries * sizeof(int32_t), + CodeBufferCheckScope::kCheck, CodeBufferCheckScope::kExactSize); + + __ Bind(&table_start_); + const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + vixl::Label* target_label = codegen->GetLabelOf(successors[i]); + DCHECK(target_label->IsBound()); + ptrdiff_t jump_offset = target_label->location() - table_start_.location(); + DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min()); + DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max()); + Literal<int32_t> literal(jump_offset); + __ place(&literal); + } +} + #undef __ Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { @@ -587,6 +613,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, compiler_options, stats), block_labels_(nullptr), + jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), @@ -603,10 +630,16 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, AddAllocatedRegister(LocationFrom(lr)); } -#undef __ #define __ GetVIXLAssembler()-> +void CodeGeneratorARM64::EmitJumpTables() { + for (auto jump_table : jump_tables_) { + jump_table->EmitTable(this); + } +} + void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { + EmitJumpTables(); // Ensure we emit the literal pool. __ FinalizeCode(); @@ -2893,7 +2926,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok __ Ldr(XRegisterFrom(temp), MemOperand(tr, invoke->GetStringInitOffset())); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: // Load method address from literal pool. @@ -2927,7 +2960,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register reg = XRegisterFrom(temp); Register method_reg; if (current_method.IsRegister()) { @@ -3848,26 +3881,73 @@ void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); + uint32_t num_entries = switch_instr->GetNumEntries(); Register value_reg = InputRegisterAt(switch_instr, 0); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - // Create a series of compare/jumps. - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int32_t i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - vixl::Label* succ = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Cbz(value_reg, succ); - } else { - __ Cmp(value_reg, vixl::Operand(case_value)); - __ B(eq, succ); + // Roughly set 16 as max average assemblies generated per HIR in a graph. + static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * vixl::kInstructionSize; + // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to + // make sure we don't emit it if the target may run out of range. + // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR + // ranges and emit the tables only as required. + static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction; + + if (num_entries < kPackedSwitchJumpTableThreshold || + // Current instruction id is an upper bound of the number of HIRs in the graph. + GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) { + // Create a series of compare/jumps. + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + int32_t case_value = lower_bound + i; + vixl::Label* succ = codegen_->GetLabelOf(successors[i]); + if (case_value == 0) { + __ Cbz(value_reg, succ); + } else { + __ Cmp(value_reg, Operand(case_value)); + __ B(eq, succ); + } } - } - // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ B(codegen_->GetLabelOf(default_block)); + // And the default for any other value. + if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + __ B(codegen_->GetLabelOf(default_block)); + } + } else { + JumpTableARM64* jump_table = new (GetGraph()->GetArena()) JumpTableARM64(switch_instr); + codegen_->AddJumpTable(jump_table); + + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); + + // Below instructions should use at most one blocked register. Since there are two blocked + // registers, we are free to block one. + Register temp_w = temps.AcquireW(); + Register index; + // Remove the bias. + if (lower_bound != 0) { + index = temp_w; + __ Sub(index, value_reg, Operand(lower_bound)); + } else { + index = value_reg; + } + + // Jump to default block if index is out of the range. + __ Cmp(index, Operand(num_entries)); + __ B(hs, codegen_->GetLabelOf(default_block)); + + // In current VIXL implementation, it won't require any blocked registers to encode the + // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the + // register pressure. + Register table_base = temps.AcquireX(); + // Load jump offset from the table. + __ Adr(table_base, jump_table->GetTableStartLabel()); + Register jump_offset = temp_w; + __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2)); + + // Jump to target block by branching to table_base(pc related) + offset. + Register target_address = table_base; + __ Add(target_address, table_base, Operand(jump_offset, SXTW)); + __ Br(target_address); } } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 44036621f8..881afcc123 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -81,6 +81,22 @@ class SlowPathCodeARM64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64); }; +class JumpTableARM64 : public ArenaObject<kArenaAllocSwitchTable> { + public: + explicit JumpTableARM64(HPackedSwitch* switch_instr) + : switch_instr_(switch_instr), table_start_() {} + + vixl::Label* GetTableStartLabel() { return &table_start_; } + + void EmitTable(CodeGeneratorARM64* codegen); + + private: + HPackedSwitch* const switch_instr_; + vixl::Label table_start_; + + DISALLOW_COPY_AND_ASSIGN(JumpTableARM64); +}; + static const vixl::Register kRuntimeParameterCoreRegisters[] = { vixl::x0, vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7 }; static constexpr size_t kRuntimeParameterCoreRegistersLength = @@ -358,6 +374,10 @@ class CodeGeneratorARM64 : public CodeGenerator { block_labels_ = CommonInitializeLabels<vixl::Label>(); } + void AddJumpTable(JumpTableARM64* jump_table) { + jump_tables_.push_back(jump_table); + } + void Finalize(CodeAllocator* allocator) OVERRIDE; // Code generation helpers. @@ -426,9 +446,12 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::Label* pc_insn_label; }; + void EmitJumpTables(); + // Labels for each block that will be compiled. vixl::Label* block_labels_; // Indexed by block id. vixl::Label frame_entry_label_; + ArenaVector<JumpTableARM64*> jump_tables_; LocationsBuilderARM64 location_builder_; InstructionCodeGeneratorARM64 instruction_visitor_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 919ed2db78..801e203de5 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -3031,7 +3031,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke invoke->GetStringInitOffset()); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); @@ -3043,7 +3043,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke LOG(FATAL) << "Unsupported"; UNREACHABLE(); case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register reg = temp.AsRegister<Register>(); Register method_reg; if (current_method.IsRegister()) { diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 5864660890..7b33075358 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -2822,9 +2822,9 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in // sorted out. if (invoke->HasCurrentMethodInput()) { LocationSummary* locations = invoke->GetLocations(); - Location location = locations->InAt(invoke->GetCurrentMethodInputIndex()); + Location location = locations->InAt(invoke->GetSpecialInputIndex()); if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation()); + locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation()); } } } @@ -2882,7 +2882,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo invoke->GetStringInitOffset()); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst64(temp.AsRegister<GpuRegister>(), invoke->GetMethodAddress()); @@ -2894,7 +2894,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo LOG(FATAL) << "Unsupported"; UNREACHABLE(); case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); GpuRegister reg = temp.AsRegister<GpuRegister>(); GpuRegister method_reg; if (current_method.IsRegister()) { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 999306c34b..a87e8ede04 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1908,7 +1908,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::Any()); + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); } return; } @@ -1917,7 +1917,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. if (invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetCurrentMethodInputIndex(), + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); } @@ -1926,9 +1926,9 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok // needs a register. We therefore do not require a register for it, and let // the code generation of the invoke handle it. LocationSummary* locations = invoke->GetLocations(); - Location location = locations->InAt(invoke->GetCurrentMethodInputIndex()); + Location location = locations->InAt(invoke->GetSpecialInputIndex()); if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation()); + locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation()); } } } @@ -4032,7 +4032,7 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOr Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp) { DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); - Location location = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); if (!invoke->GetLocations()->Intrinsified()) { return location.AsRegister<Register>(); } @@ -4063,7 +4063,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(invoke->GetStringInitOffset())); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); @@ -4084,7 +4084,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; Register reg = temp.AsRegister<Register>(); if (current_method.IsRegister()) { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 4088160b3f..dcc180804d 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -718,7 +718,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo Address::Absolute(invoke->GetStringInitOffset(), true)); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress())); @@ -737,7 +737,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo __ Bind(&pc_relative_dex_cache_patches_.back().label); break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; CpuRegister reg = temp.AsRegister<CpuRegister>(); if (current_method.IsRegister()) { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 2b7790184a..d166d0061f 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -397,6 +397,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << invoke->IsRecursive() << std::noboolalpha; StartAttributeStream("intrinsic") << invoke->GetIntrinsic(); + if (invoke->IsStatic()) { + StartAttributeStream("clinit_check") << invoke->GetClinitCheckRequirement(); + } } void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE { @@ -500,6 +503,18 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; } else if (instruction->IsLoadClass()) { StartAttributeStream("klass") << "unresolved"; + } else if (instruction->IsNullConstant()) { + // The NullConstant may be added to the graph during other passes that happen between + // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner + // doesn't run or doesn't inline anything, the NullConstant remains untyped. + // So we should check NullConstants for validity only after reference type propagation. + // + // Note: The infrastructure to properly type NullConstants everywhere is to complex to add + // for the benefits. + StartAttributeStream("klass") << "not_set"; + DCHECK(!is_after_pass_ + || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName)) + << " Expected a valid rti after reference type propagation"; } else { DCHECK(!is_after_pass_) << "Expected a valid rti after reference type propagation"; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index b97dc1a511..9ad2dd1c8e 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -796,6 +796,34 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { HShl* shl = new(allocator) HShl(type, input_other, shift); block->ReplaceAndRemoveInstructionWith(instruction, shl); RecordSimplification(); + } else if (IsPowerOfTwo(factor - 1)) { + // Transform code looking like + // MUL dst, src, (2^n + 1) + // into + // SHL tmp, src, n + // ADD dst, src, tmp + HShl* shl = new (allocator) HShl(type, + input_other, + GetGraph()->GetIntConstant(WhichPowerOf2(factor - 1))); + HAdd* add = new (allocator) HAdd(type, input_other, shl); + + block->InsertInstructionBefore(shl, instruction); + block->ReplaceAndRemoveInstructionWith(instruction, add); + RecordSimplification(); + } else if (IsPowerOfTwo(factor + 1)) { + // Transform code looking like + // MUL dst, src, (2^n - 1) + // into + // SHL tmp, src, n + // SUB dst, tmp, src + HShl* shl = new (allocator) HShl(type, + input_other, + GetGraph()->GetIntConstant(WhichPowerOf2(factor + 1))); + HSub* sub = new (allocator) HSub(type, shl, input_other); + + block->InsertInstructionBefore(shl, instruction); + block->ReplaceAndRemoveInstructionWith(instruction, sub); + RecordSimplification(); } } } diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 0a5acc3e64..d2017da221 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -44,7 +44,23 @@ using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitor bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathARM slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathARM for HInvokeStaticOrDirect, + // ReadBarrierSlowPathARM for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } #define __ assembler-> @@ -662,20 +678,23 @@ static void GenUnsafeGet(HInvoke* invoke, (type == Primitive::kPrimLong) || (type == Primitive::kPrimNot)); ArmAssembler* assembler = codegen->GetAssembler(); - Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. - Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only. + Location base_loc = locations->InAt(1); + Register base = base_loc.AsRegister<Register>(); // Object pointer. + Location offset_loc = locations->InAt(2); + Register offset = offset_loc.AsRegisterPairLow<Register>(); // Long offset, lo part only. + Location trg_loc = locations->Out(); if (type == Primitive::kPrimLong) { - Register trg_lo = locations->Out().AsRegisterPairLow<Register>(); + Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); __ add(IP, base, ShifterOperand(offset)); if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { - Register trg_hi = locations->Out().AsRegisterPairHigh<Register>(); + Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); __ ldrexd(trg_lo, trg_hi, IP); } else { __ ldrd(trg_lo, Address(IP)); } } else { - Register trg = locations->Out().AsRegister<Register>(); + Register trg = trg_loc.AsRegister<Register>(); __ ldr(trg, Address(base, offset)); } @@ -684,14 +703,18 @@ static void GenUnsafeGet(HInvoke* invoke, } if (type == Primitive::kPrimNot) { - Register trg = locations->Out().AsRegister<Register>(); - __ MaybeUnpoisonHeapReference(trg); + codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); } } static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -936,6 +959,7 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ Bind(&loop_head); __ ldrex(tmp_lo, tmp_ptr); + // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); @@ -964,7 +988,11 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic does not always work when heap // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it // off temporarily as a quick fix. + // // TODO(rpl): Fix it and turn it back on. + // + // TODO(rpl): Also, we should investigate whether we need a read + // barrier in the generated code. if (kPoisonHeapReferences) { return; } @@ -1400,6 +1428,10 @@ static void CheckPosition(ArmAssembler* assembler, } } +// TODO: Implement read barriers in the SystemArrayCopy intrinsic. +// Note that this code path is not used (yet) because we do not +// intrinsify methods that can go into the IntrinsicSlowPathARM +// slow path. void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { ArmAssembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index ff843ebb1e..3654159f83 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1391,6 +1391,108 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +// boolean java.lang.String.equals(Object anObject) +void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + + // Temporary registers to store lengths of strings and for calculations. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + GpuRegister str = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister arg = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>(); + GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>(); + GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>(); + + Label loop; + Label end; + Label return_true; + Label return_false; + + // Get offsets of count, value, and class fields within a string object. + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + const int32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // If the register containing the pointer to "this", and the register + // containing the pointer to "anObject" are the same register then + // "this", and "anObject" are the same object and we can + // short-circuit the logic to a true result. + if (str == arg) { + __ LoadConst64(out, 1); + return; + } + + // Check if input is null, return false if it is. + __ Beqzc(arg, &return_false); + + // Reference equality check, return true if same reference. + __ Beqc(str, arg, &return_true); + + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ Lw(temp1, str, class_offset); + __ Lw(temp2, arg, class_offset); + __ Bnec(temp1, temp2, &return_false); + + // Load lengths of this and argument strings. + __ Lw(temp1, str, count_offset); + __ Lw(temp2, arg, count_offset); + // Check if lengths are equal, return false if they're not. + __ Bnec(temp1, temp2, &return_false); + // Return true if both strings are empty. + __ Beqzc(temp1, &return_true); + + // Don't overwrite input registers + __ Move(TMP, str); + __ Move(temp3, arg); + + // Assertions that must hold in order to compare strings 4 characters at a time. + DCHECK_ALIGNED(value_offset, 8); + static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); + + // Loop to compare strings 4 characters at a time starting at the beginning of the string. + // Ok to do this because strings are zero-padded to be 8-byte aligned. + __ Bind(&loop); + __ Ld(out, TMP, value_offset); + __ Ld(temp2, temp3, value_offset); + __ Bnec(out, temp2, &return_false); + __ Daddiu(TMP, TMP, 8); + __ Daddiu(temp3, temp3, 8); + __ Addiu(temp1, temp1, -4); + __ Bgtzc(temp1, &loop); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ LoadConst64(out, 1); + __ B(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ LoadConst64(out, 0); + __ Bind(&end); +} + static void GenerateStringIndexOf(HInvoke* invoke, Mips64Assembler* assembler, CodeGeneratorMIPS64* codegen, @@ -1586,8 +1688,6 @@ void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(MathRoundFloat) -UNIMPLEMENTED_INTRINSIC(StringEquals) - UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 6fbb6823d6..5b89cfef5a 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -119,19 +119,10 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { : ref_info_(ref_info), offset_(offset), index_(index), - declaring_class_def_index_(declaring_class_def_index), - may_become_unknown_(true) { + declaring_class_def_index_(declaring_class_def_index) { DCHECK(ref_info != nullptr); DCHECK((offset == kInvalidFieldOffset && index != nullptr) || (offset != kInvalidFieldOffset && index == nullptr)); - - if (ref_info->IsSingletonAndNotReturned()) { - // We try to track stores to singletons that aren't returned to eliminate the stores - // since values in singleton's fields cannot be killed due to aliasing. Those values - // can still be killed due to merging values since we don't build phi for merging heap - // values. SetMayBecomeUnknown(true) may be called later once such merge becomes possible. - may_become_unknown_ = false; - } } ReferenceInfo* GetReferenceInfo() const { return ref_info_; } @@ -148,21 +139,11 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { return index_ != nullptr; } - // Returns true if this heap location's value may become unknown after it's - // set to a value, due to merge of values, or killed due to aliasing. - bool MayBecomeUnknown() const { - return may_become_unknown_; - } - void SetMayBecomeUnknown(bool val) { - may_become_unknown_ = val; - } - private: ReferenceInfo* const ref_info_; // reference for instance/static field or array access. const size_t offset_; // offset of static/instance field. HInstruction* const index_; // index of an array element. const int16_t declaring_class_def_index_; // declaring class's def's dex index. - bool may_become_unknown_; // value may become kUnknownHeapValue. DISALLOW_COPY_AND_ASSIGN(HeapLocation); }; @@ -381,26 +362,13 @@ class HeapLocationCollector : public HGraphVisitor { return heap_locations_[heap_location_idx]; } - void VisitFieldAccess(HInstruction* field_access, - HInstruction* ref, - const FieldInfo& field_info, - bool is_store) { + void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { if (field_info.IsVolatile()) { has_volatile_ = true; } const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); const size_t offset = field_info.GetFieldOffset().SizeValue(); - HeapLocation* location = GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); - // A store of a value may be eliminated if all future loads for that value can be eliminated. - // For a value that's stored into a singleton field, the value will not be killed due - // to aliasing. However if the value is set in a block that doesn't post dominate the definition, - // the value may be killed due to merging later. Before we have post dominating info, we check - // if the store is in the same block as the definition just to be conservative. - if (is_store && - location->GetReferenceInfo()->IsSingletonAndNotReturned() && - field_access->GetBlock() != ref->GetBlock()) { - location->SetMayBecomeUnknown(true); - } + GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); } void VisitArrayAccess(HInstruction* array, HInstruction* index) { @@ -409,20 +377,20 @@ class HeapLocationCollector : public HGraphVisitor { } void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { - VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false); + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); } void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { - VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true); + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; } void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { - VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false); + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); } void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { - VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true); + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; } @@ -464,9 +432,14 @@ class HeapLocationCollector : public HGraphVisitor { }; // An unknown heap value. Loads with such a value in the heap location cannot be eliminated. +// A heap location can be set to kUnknownHeapValue when: +// - initially set a value. +// - killed due to aliasing, merging, invocation, or loop side effects. static HInstruction* const kUnknownHeapValue = reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-1)); + // Default heap value after an allocation. +// A heap location can be set to that value right after an allocation. static HInstruction* const kDefaultHeapValue = reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-2)); @@ -484,29 +457,17 @@ class LSEVisitor : public HGraphVisitor { kUnknownHeapValue, graph->GetArena()->Adapter(kArenaAllocLSE)), graph->GetArena()->Adapter(kArenaAllocLSE)), - removed_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)), - substitute_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)), + removed_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)), + substitute_instructions_for_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)), + possibly_removed_stores_(graph->GetArena()->Adapter(kArenaAllocLSE)), singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)) { } void VisitBasicBlock(HBasicBlock* block) OVERRIDE { - int block_id = block->GetBlockId(); - ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id]; + // Populate the heap_values array for this block. // TODO: try to reuse the heap_values array from one predecessor if possible. if (block->IsLoopHeader()) { - // We do a single pass in reverse post order. For loops, use the side effects as a hint - // to see if the heap values should be killed. - if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) { - // Leave all values as kUnknownHeapValue. - } else { - // Inherit the values from pre-header. - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - ArenaVector<HInstruction*>& pre_header_heap_values = - heap_values_for_[pre_header->GetBlockId()]; - for (size_t i = 0; i < heap_values.size(); i++) { - heap_values[i] = pre_header_heap_values[i]; - } - } + HandleLoopSideEffects(block); } else { MergePredecessorValues(block); } @@ -515,23 +476,34 @@ class LSEVisitor : public HGraphVisitor { // Remove recorded instructions that should be eliminated. void RemoveInstructions() { - size_t size = removed_instructions_.size(); - DCHECK_EQ(size, substitute_instructions_.size()); + size_t size = removed_loads_.size(); + DCHECK_EQ(size, substitute_instructions_for_loads_.size()); for (size_t i = 0; i < size; i++) { - HInstruction* instruction = removed_instructions_[i]; - DCHECK(instruction != nullptr); - HInstruction* substitute = substitute_instructions_[i]; - if (substitute != nullptr) { - // Keep tracing substitute till one that's not removed. - HInstruction* sub_sub = FindSubstitute(substitute); - while (sub_sub != substitute) { - substitute = sub_sub; - sub_sub = FindSubstitute(substitute); - } - instruction->ReplaceWith(substitute); + HInstruction* load = removed_loads_[i]; + DCHECK(load != nullptr); + DCHECK(load->IsInstanceFieldGet() || + load->IsStaticFieldGet() || + load->IsArrayGet()); + HInstruction* substitute = substitute_instructions_for_loads_[i]; + DCHECK(substitute != nullptr); + // Keep tracing substitute till one that's not removed. + HInstruction* sub_sub = FindSubstitute(substitute); + while (sub_sub != substitute) { + substitute = sub_sub; + sub_sub = FindSubstitute(substitute); } - instruction->GetBlock()->RemoveInstruction(instruction); + load->ReplaceWith(substitute); + load->GetBlock()->RemoveInstruction(load); } + + // At this point, stores in possibly_removed_stores_ can be safely removed. + size = possibly_removed_stores_.size(); + for (size_t i = 0; i < size; i++) { + HInstruction* store = possibly_removed_stores_[i]; + DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet()); + store->GetBlock()->RemoveInstruction(store); + } + // TODO: remove unnecessary allocations. // Eliminate instructions in singleton_new_instances_ that: // - don't have uses, @@ -541,6 +513,52 @@ class LSEVisitor : public HGraphVisitor { } private: + // If heap_values[index] is an instance field store, need to keep the store. + // This is necessary if a heap value is killed due to merging, or loop side + // effects (which is essentially merging also), since a load later from the + // location won't be eliminated. + void KeepIfIsStore(HInstruction* heap_value) { + if (heap_value == kDefaultHeapValue || + heap_value == kUnknownHeapValue || + !heap_value->IsInstanceFieldSet()) { + return; + } + auto idx = std::find(possibly_removed_stores_.begin(), + possibly_removed_stores_.end(), heap_value); + if (idx != possibly_removed_stores_.end()) { + // Make sure the store is kept. + possibly_removed_stores_.erase(idx); + } + } + + void HandleLoopSideEffects(HBasicBlock* block) { + DCHECK(block->IsLoopHeader()); + int block_id = block->GetBlockId(); + ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id]; + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + ArenaVector<HInstruction*>& pre_header_heap_values = + heap_values_for_[pre_header->GetBlockId()]; + // We do a single pass in reverse post order. For loops, use the side effects as a hint + // to see if the heap values should be killed. + if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) { + for (size_t i = 0; i < pre_header_heap_values.size(); i++) { + // heap value is killed by loop side effects, need to keep the last store. + KeepIfIsStore(pre_header_heap_values[i]); + } + if (kIsDebugBuild) { + // heap_values should all be kUnknownHeapValue that it is inited with. + for (size_t i = 0; i < heap_values.size(); i++) { + DCHECK_EQ(heap_values[i], kUnknownHeapValue); + } + } + } else { + // Inherit the values from pre-header. + for (size_t i = 0; i < heap_values.size(); i++) { + heap_values[i] = pre_header_heap_values[i]; + } + } + } + void MergePredecessorValues(HBasicBlock* block) { const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors(); if (predecessors.size() == 0) { @@ -548,16 +566,25 @@ class LSEVisitor : public HGraphVisitor { } ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0; i < heap_values.size(); i++) { - HInstruction* value = heap_values_for_[predecessors[0]->GetBlockId()][i]; - if (value != kUnknownHeapValue) { + HInstruction* pred0_value = heap_values_for_[predecessors[0]->GetBlockId()][i]; + heap_values[i] = pred0_value; + if (pred0_value != kUnknownHeapValue) { for (size_t j = 1; j < predecessors.size(); j++) { - if (heap_values_for_[predecessors[j]->GetBlockId()][i] != value) { - value = kUnknownHeapValue; + HInstruction* pred_value = heap_values_for_[predecessors[j]->GetBlockId()][i]; + if (pred_value != pred0_value) { + heap_values[i] = kUnknownHeapValue; break; } } } - heap_values[i] = value; + + if (heap_values[i] == kUnknownHeapValue) { + // Keep the last store in each predecessor since future loads cannot be eliminated. + for (size_t j = 0; j < predecessors.size(); j++) { + ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessors[j]->GetBlockId()]; + KeepIfIsStore(pred_values[i]); + } + } } } @@ -616,21 +643,30 @@ class LSEVisitor : public HGraphVisitor { HInstruction* heap_value = heap_values[idx]; if (heap_value == kDefaultHeapValue) { HInstruction* constant = GetDefaultValue(instruction->GetType()); - removed_instructions_.push_back(instruction); - substitute_instructions_.push_back(constant); + removed_loads_.push_back(instruction); + substitute_instructions_for_loads_.push_back(constant); heap_values[idx] = constant; return; } + if (heap_value != kUnknownHeapValue && heap_value->IsInstanceFieldSet()) { + HInstruction* store = heap_value; + // This load must be from a singleton since it's from the same field + // that a "removed" store puts the value. That store must be to a singleton's field. + DCHECK(ref_info->IsSingleton()); + // Get the real heap value of the store. + heap_value = store->InputAt(1); + } if ((heap_value != kUnknownHeapValue) && // Keep the load due to possible I/F, J/D array aliasing. // See b/22538329 for details. (heap_value->GetType() == instruction->GetType())) { - removed_instructions_.push_back(instruction); - substitute_instructions_.push_back(heap_value); + removed_loads_.push_back(instruction); + substitute_instructions_for_loads_.push_back(heap_value); TryRemovingNullCheck(instruction); return; } + // Load isn't eliminated. if (heap_value == kUnknownHeapValue) { // Put the load as the value into the HeapLocation. // This acts like GVN but with better aliasing analysis. @@ -662,51 +698,63 @@ class LSEVisitor : public HGraphVisitor { ArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; HInstruction* heap_value = heap_values[idx]; - bool redundant_store = false; + bool same_value = false; + bool possibly_redundant = false; if (Equal(heap_value, value)) { // Store into the heap location with the same value. - redundant_store = true; + same_value = true; } else if (index != nullptr) { // For array element, don't eliminate stores since it can be easily aliased // with non-constant index. } else if (!heap_location_collector_.MayDeoptimize() && - ref_info->IsSingletonAndNotReturned() && - !heap_location_collector_.GetHeapLocation(idx)->MayBecomeUnknown()) { - // Store into a field of a singleton that's not returned. And that value cannot be - // killed due to merge. It's redundant since future loads will get the value - // set by this instruction. - Primitive::Type type = Primitive::kPrimVoid; - if (instruction->IsInstanceFieldSet()) { - type = instruction->AsInstanceFieldSet()->GetFieldInfo().GetFieldType(); - } else if (instruction->IsStaticFieldSet()) { - type = instruction->AsStaticFieldSet()->GetFieldInfo().GetFieldType(); - } else { - DCHECK(false) << "Must be an instance/static field set instruction."; - } - if (value->GetType() != type) { - // I/F, J/D aliasing should not happen for fields. - DCHECK(Primitive::IsIntegralType(value->GetType())); - DCHECK(!Primitive::Is64BitType(value->GetType())); - DCHECK(Primitive::IsIntegralType(type)); - DCHECK(!Primitive::Is64BitType(type)); - // Keep the store since the corresponding load isn't eliminated due to different types. - // TODO: handle the different int types so that we can eliminate this store. - redundant_store = false; + ref_info->IsSingletonAndNotReturned()) { + // Store into a field of a singleton that's not returned. The value cannot be + // killed due to aliasing/invocation. It can be redundant since future loads can + // directly get the value set by this instruction. The value can still be killed due to + // merging or loop side effects. Stores whose values are killed due to merging/loop side + // effects later will be removed from possibly_removed_stores_ when that is detected. + possibly_redundant = true; + HNewInstance* new_instance = ref_info->GetReference()->AsNewInstance(); + DCHECK(new_instance != nullptr); + if (new_instance->IsFinalizable()) { + // Finalizable objects escape globally. Need to keep the store. + possibly_redundant = false; } else { - redundant_store = true; + HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); + if (loop_info != nullptr) { + // instruction is a store in the loop so the loop must does write. + DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); + + if (loop_info->IsLoopInvariant(original_ref, false)) { + DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); + // Keep the store since its value may be needed at the loop header. + possibly_redundant = false; + } else { + // The singleton is created inside the loop. Value stored to it isn't needed at + // the loop header. This is true for outer loops also. + } + } } - // TODO: eliminate the store if the singleton object is not finalizable. - redundant_store = false; } - if (redundant_store) { - removed_instructions_.push_back(instruction); - substitute_instructions_.push_back(nullptr); - TryRemovingNullCheck(instruction); + if (same_value || possibly_redundant) { + possibly_removed_stores_.push_back(instruction); } - heap_values[idx] = value; + if (!same_value) { + if (possibly_redundant) { + DCHECK(instruction->IsInstanceFieldSet()); + // Put the store as the heap value. If the value is loaded from heap + // by a load later, this store isn't really redundant. + heap_values[idx] = instruction; + } else { + heap_values[idx] = value; + } + } // This store may kill values in other heap locations due to aliasing. for (size_t i = 0; i < heap_values.size(); i++) { + if (i == idx) { + continue; + } if (heap_values[i] == value) { // Same value should be kept even if aliasing happens. continue; @@ -834,9 +882,10 @@ class LSEVisitor : public HGraphVisitor { return; } if (!heap_location_collector_.MayDeoptimize() && - ref_info->IsSingletonAndNotReturned()) { - // The allocation might be eliminated. - singleton_new_instances_.push_back(new_instance); + ref_info->IsSingletonAndNotReturned() && + !new_instance->IsFinalizable() && + !new_instance->CanThrow()) { + // TODO: add new_instance to singleton_new_instances_ and enable allocation elimination. } ArenaVector<HInstruction*>& heap_values = heap_values_for_[new_instance->GetBlock()->GetBlockId()]; @@ -854,10 +903,10 @@ class LSEVisitor : public HGraphVisitor { // Find an instruction's substitute if it should be removed. // Return the same instruction if it should not be removed. HInstruction* FindSubstitute(HInstruction* instruction) { - size_t size = removed_instructions_.size(); + size_t size = removed_loads_.size(); for (size_t i = 0; i < size; i++) { - if (removed_instructions_[i] == instruction) { - return substitute_instructions_[i]; + if (removed_loads_[i] == instruction) { + return substitute_instructions_for_loads_[i]; } } return instruction; @@ -871,8 +920,13 @@ class LSEVisitor : public HGraphVisitor { // We record the instructions that should be eliminated but may be // used by heap locations. They'll be removed in the end. - ArenaVector<HInstruction*> removed_instructions_; - ArenaVector<HInstruction*> substitute_instructions_; + ArenaVector<HInstruction*> removed_loads_; + ArenaVector<HInstruction*> substitute_instructions_for_loads_; + + // Stores in this list may be removed from the list later when it's + // found that the store cannot be eliminated. + ArenaVector<HInstruction*> possibly_removed_stores_; + ArenaVector<HInstruction*> singleton_new_instances_; DISALLOW_COPY_AND_ASSIGN(LSEVisitor); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 73a44ee2cb..0a39ff31bf 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2068,6 +2068,19 @@ void HInvokeStaticOrDirect::RemoveInputAt(size_t index) { } } +std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs) { + switch (rhs) { + case HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit: + return os << "explicit"; + case HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit: + return os << "implicit"; + case HInvokeStaticOrDirect::ClinitCheckRequirement::kNone: + return os << "none"; + default: + return os << "unknown:" << static_cast<int>(rhs); + } +} + void HInstruction::RemoveEnvironmentUsers() { for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) { HUseListNode<HEnvironment*>* user_node = use_it.Current(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 1da2a1dfd0..4f894b07c7 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -3434,14 +3434,19 @@ class HInvokeStaticOrDirect : public HInvoke { DCHECK(had_current_method_input || !needs_current_method_input); if (had_current_method_input && !needs_current_method_input) { - DCHECK_EQ(InputAt(GetCurrentMethodInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod()); - RemoveInputAt(GetCurrentMethodInputIndex()); + DCHECK_EQ(InputAt(GetSpecialInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod()); + RemoveInputAt(GetSpecialInputIndex()); } dispatch_info_ = dispatch_info; } - void InsertInputAt(size_t index, HInstruction* input); - void RemoveInputAt(size_t index); + void AddSpecialInput(HInstruction* input) { + // We allow only one special input. + DCHECK(!IsStringInit() && !HasCurrentMethodInput()); + DCHECK(InputCount() == GetSpecialInputIndex() || + (InputCount() == GetSpecialInputIndex() + 1 && IsStaticWithExplicitClinitCheck())); + InsertInputAt(GetSpecialInputIndex(), input); + } bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { // We access the method via the dex cache so we can't do an implicit null check. @@ -3453,13 +3458,20 @@ class HInvokeStaticOrDirect : public HInvoke { return return_type_ == Primitive::kPrimNot && !IsStringInit(); } + // Get the index of the special input, if any. + // + // If the invoke IsStringInit(), it initially has a HFakeString special argument + // which is removed by the instruction simplifier; if the invoke HasCurrentMethodInput(), + // the "special input" is the current method pointer; otherwise there may be one + // platform-specific special input, such as PC-relative addressing base. + uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); } + InvokeType GetInvokeType() const { return invoke_type_; } MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; } CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; } bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE; bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; } - uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); } bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; } bool HasPcRelativeDexCache() const { return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; @@ -3467,11 +3479,11 @@ class HInvokeStaticOrDirect : public HInvoke { bool HasCurrentMethodInput() const { // This function can be called only after the invoke has been fully initialized by the builder. if (NeedsCurrentMethodInput(GetMethodLoadKind())) { - DCHECK(InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod()); + DCHECK(InputAt(GetSpecialInputIndex())->IsCurrentMethod()); return true; } else { - DCHECK(InputCount() == GetCurrentMethodInputIndex() || - !InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod()); + DCHECK(InputCount() == GetSpecialInputIndex() || + !InputAt(GetSpecialInputIndex())->IsCurrentMethod()); return false; } } @@ -3505,20 +3517,19 @@ class HInvokeStaticOrDirect : public HInvoke { return GetInvokeType() == kStatic; } - // Remove the art::HLoadClass instruction set as last input by - // art::PrepareForRegisterAllocation::VisitClinitCheck in lieu of - // the initial art::HClinitCheck instruction (only relevant for - // static calls with explicit clinit check). - void RemoveLoadClassAsLastInput() { + // Remove the HClinitCheck or the replacement HLoadClass (set as last input by + // PrepareForRegisterAllocation::VisitClinitCheck() in lieu of the initial HClinitCheck) + // instruction; only relevant for static calls with explicit clinit check. + void RemoveExplicitClinitCheck(ClinitCheckRequirement new_requirement) { DCHECK(IsStaticWithExplicitClinitCheck()); size_t last_input_index = InputCount() - 1; HInstruction* last_input = InputAt(last_input_index); DCHECK(last_input != nullptr); - DCHECK(last_input->IsLoadClass()) << last_input->DebugName(); + DCHECK(last_input->IsLoadClass() || last_input->IsClinitCheck()) << last_input->DebugName(); RemoveAsUserOfInput(last_input_index); inputs_.pop_back(); - clinit_check_requirement_ = ClinitCheckRequirement::kImplicit; - DCHECK(IsStaticWithImplicitClinitCheck()); + clinit_check_requirement_ = new_requirement; + DCHECK(!IsStaticWithExplicitClinitCheck()); } bool IsStringFactoryFor(HFakeString* str) const { @@ -3539,7 +3550,7 @@ class HInvokeStaticOrDirect : public HInvoke { } // Is this a call to a static method whose declaring class has an - // explicit intialization check in the graph? + // explicit initialization check in the graph? bool IsStaticWithExplicitClinitCheck() const { return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kExplicit); } @@ -3572,6 +3583,9 @@ class HInvokeStaticOrDirect : public HInvoke { return input_record; } + void InsertInputAt(size_t index, HInstruction* input); + void RemoveInputAt(size_t index); + private: const InvokeType invoke_type_; ClinitCheckRequirement clinit_check_requirement_; @@ -3583,6 +3597,7 @@ class HInvokeStaticOrDirect : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect); }; +std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs); class HInvokeVirtual : public HInvoke { public: @@ -3643,10 +3658,14 @@ class HNewInstance : public HExpression<1> { uint32_t dex_pc, uint16_t type_index, const DexFile& dex_file, + bool can_throw, + bool finalizable, QuickEntrypointEnum entrypoint) : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc), type_index_(type_index), dex_file_(dex_file), + can_throw_(can_throw), + finalizable_(finalizable), entrypoint_(entrypoint) { SetRawInputAt(0, current_method); } @@ -3656,11 +3675,13 @@ class HNewInstance : public HExpression<1> { // Calls runtime so needs an environment. bool NeedsEnvironment() const OVERRIDE { return true; } - // It may throw when called on: - // - interfaces - // - abstract/innaccessible/unknown classes - // TODO: optimize when possible. - bool CanThrow() const OVERRIDE { return true; } + + // It may throw when called on type that's not instantiable/accessible. + // It can throw OOME. + // TODO: distinguish between the two cases so we can for example allow allocation elimination. + bool CanThrow() const OVERRIDE { return can_throw_ || true; } + + bool IsFinalizable() const { return finalizable_; } bool CanBeNull() const OVERRIDE { return false; } @@ -3671,6 +3692,8 @@ class HNewInstance : public HExpression<1> { private: const uint16_t type_index_; const DexFile& dex_file_; + const bool can_throw_; + const bool finalizable_; const QuickEntrypointEnum entrypoint_; DISALLOW_COPY_AND_ASSIGN(HNewInstance); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 27ee47296c..2204921c53 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -383,10 +383,11 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) { || instruction_set == kX86_64; } -// Read barrier are supported only on x86 and x86-64 at the moment. +// Read barrier are supported only on ARM, x86 and x86-64 at the moment. // TODO: Add support for other architectures and remove this function static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) { - return instruction_set == kX86 + return instruction_set == kThumb2 + || instruction_set == kX86 || instruction_set == kX86_64; } @@ -668,8 +669,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); - // Always use the thumb2 assembler: some runtime functionality (like implicit stack - // overflow checks) assume thumb2. + // Always use the Thumb-2 assembler: some runtime functionality + // (like implicit stack overflow checks) assume Thumb-2. if (instruction_set == kArm) { instruction_set = kThumb2; } diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index c2894c7338..808a1dc6c2 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -113,9 +113,8 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) { InitializePCRelativeBasePointer(invoke); // Add the extra parameter base_. - uint32_t index = invoke_static_or_direct->GetCurrentMethodInputIndex(); DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); - invoke_static_or_direct->InsertInputAt(index, base_); + invoke_static_or_direct->AddSpecialInput(base_); } // Ensure that we can load FP arguments from the constant area. for (size_t i = 0, e = invoke->InputCount(); i < e; i++) { diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index ca928ae0f2..f3d075caaa 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -48,12 +48,46 @@ void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) { } void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { - HLoadClass* cls = check->GetLoadClass(); - check->ReplaceWith(cls); - if (check->GetPrevious() == cls) { + // Try to find a static invoke from which this check originated. + HInvokeStaticOrDirect* invoke = nullptr; + for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + if (user->IsInvokeStaticOrDirect() && CanMoveClinitCheck(check, user)) { + invoke = user->AsInvokeStaticOrDirect(); + DCHECK(invoke->IsStaticWithExplicitClinitCheck()); + invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); + break; + } + } + // If we found a static invoke for merging, remove the check from all other static invokes. + if (invoke != nullptr) { + for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); ) { + HInstruction* user = it.Current()->GetUser(); + DCHECK(invoke->StrictlyDominates(user)); // All other uses must be dominated. + it.Advance(); // Advance before we remove the node, reference to the next node is preserved. + if (user->IsInvokeStaticOrDirect()) { + user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck( + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + } + } + } + + HLoadClass* load_class = check->GetLoadClass(); + bool can_merge_with_load_class = CanMoveClinitCheck(load_class, check); + + check->ReplaceWith(load_class); + + if (invoke != nullptr) { + // Remove the check from the graph. It has been merged into the invoke. + check->GetBlock()->RemoveInstruction(check); + // Check if we can merge the load class as well. + if (can_merge_with_load_class && !load_class->HasUses()) { + load_class->GetBlock()->RemoveInstruction(load_class); + } + } else if (can_merge_with_load_class) { // Pass the initialization duty to the `HLoadClass` instruction, // and remove the instruction from the graph. - cls->SetMustGenerateClinitCheck(true); + load_class->SetMustGenerateClinitCheck(true); check->GetBlock()->RemoveInstruction(check); } } @@ -86,30 +120,60 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire DCHECK(last_input != nullptr) << "Last input is not HLoadClass. It is " << last_input->DebugName(); - // Remove a load class instruction as last input of a static - // invoke, which has been added (along with a clinit check, - // removed by PrepareForRegisterAllocation::VisitClinitCheck - // previously) by the graph builder during the creation of the - // static invoke instruction, but is no longer required at this - // stage (i.e., after inlining has been performed). - invoke->RemoveLoadClassAsLastInput(); - - // The static call will initialize the class so there's no need for a clinit check if - // it's the first user. - // There is one special case where we still need the clinit check, when inlining. Because - // currently the callee is responsible for reporting parameters to the GC, the code - // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. - // Therefore we cannot allocate any object in that code, including loading a new class. - if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) { - last_input->SetMustGenerateClinitCheck(false); - - // If the load class instruction is no longer used, remove it from - // the graph. - if (!last_input->HasUses()) { - last_input->GetBlock()->RemoveInstruction(last_input); - } + // Detach the explicit class initialization check from the invoke. + // Keeping track of the initializing instruction is no longer required + // at this stage (i.e., after inlining has been performed). + invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + + // Merging with load class should have happened in VisitClinitCheck(). + DCHECK(!CanMoveClinitCheck(last_input, invoke)); + } +} + +bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, HInstruction* user) { + // Determine if input and user come from the same dex instruction, so that we can move + // the clinit check responsibility from one to the other, i.e. from HClinitCheck (user) + // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user). + + // Start with a quick dex pc check. + if (user->GetDexPc() != input->GetDexPc()) { + return false; + } + + // Now do a thorough environment check that this is really coming from the same instruction in + // the same inlined graph. Unfortunately, we have to go through the whole environment chain. + HEnvironment* user_environment = user->GetEnvironment(); + HEnvironment* input_environment = input->GetEnvironment(); + while (user_environment != nullptr || input_environment != nullptr) { + if (user_environment == nullptr || input_environment == nullptr) { + // Different environment chain length. This happens when a method is called + // once directly and once indirectly through another inlined method. + return false; + } + if (user_environment->GetDexPc() != input_environment->GetDexPc() || + user_environment->GetMethodIdx() != input_environment->GetMethodIdx() || + !IsSameDexFile(user_environment->GetDexFile(), input_environment->GetDexFile())) { + return false; + } + user_environment = user_environment->GetParent(); + input_environment = input_environment->GetParent(); + } + + // Check for code motion taking the input to a different block. + if (user->GetBlock() != input->GetBlock()) { + return false; + } + + // In debug mode, check that we have not inserted a throwing instruction + // or an instruction with side effects between input and user. + if (kIsDebugBuild) { + for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) { + CHECK(between != nullptr); // User must be after input in the same block. + CHECK(!between->CanThrow()); + CHECK(!between->HasSideEffects()); } } + return true; } } // namespace art diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index d7f277fa0d..a70fb309df 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -41,6 +41,8 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitCondition(HCondition* condition) OVERRIDE; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; + bool CanMoveClinitCheck(HInstruction* input, HInstruction* user); + DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation); }; diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 68cf6d9233..89c2a7cbdf 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -1231,6 +1231,7 @@ class Dex2Oat FINAL { // Handle and ClassLoader creation needs to come after Runtime::Create jobject class_loader = nullptr; + jobject class_path_class_loader = nullptr; Thread* self = Thread::Current(); if (!boot_image_option_.empty()) { @@ -1248,10 +1249,12 @@ class Dex2Oat FINAL { key_value_store_->Put(OatHeader::kClassPathKey, OatFile::EncodeDexFileDependencies(class_path_files)); - // Then the dex files we'll compile. Thus we'll resolve the class-path first. - class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end()); + class_path_class_loader = class_linker->CreatePathClassLoader(self, + class_path_files, + nullptr); - class_loader = class_linker->CreatePathClassLoader(self, class_path_files); + // Class path loader as parent so that we'll resolve there first. + class_loader = class_linker->CreatePathClassLoader(self, dex_files_, class_path_class_loader); } driver_.reset(new CompilerDriver(compiler_options_.get(), diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index cd83de6265..94eb82b054 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -2412,7 +2412,7 @@ static int DumpOatWithRuntime(Runtime* runtime, OatFile* oat_file, OatDumperOpti // Need a class loader. // Fake that we're a compiler. - jobject class_loader = class_linker->CreatePathClassLoader(self, class_path); + jobject class_loader = class_linker->CreatePathClassLoader(self, class_path, /*parent*/nullptr); // Use the class loader while dumping. StackHandleScope<1> scope(self); diff --git a/runtime/Android.mk b/runtime/Android.mk index 1fdffe3e17..0b0f0942a3 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -60,6 +60,7 @@ LIBART_COMMON_SRC_FILES := \ gc/collector/concurrent_copying.cc \ gc/collector/garbage_collector.cc \ gc/collector/immune_region.cc \ + gc/collector/immune_spaces.cc \ gc/collector/mark_compact.cc \ gc/collector/mark_sweep.cc \ gc/collector/partial_mark_sweep.cc \ diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index dde100125a..2dd2a83888 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -322,7 +322,8 @@ ClassLinker::ClassLinker(InternTable* intern_table) std::fill_n(find_array_class_cache_, kFindArrayCacheSize, GcRoot<mirror::Class>(nullptr)); } -void ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path) { +bool ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path, + std::string* error_msg) { VLOG(startup) << "ClassLinker::Init"; Thread* const self = Thread::Current(); @@ -477,9 +478,15 @@ void ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> b // Setup boot_class_path_ and register class_path now that we can use AllocObjectArray to create // DexCache instances. Needs to be after String, Field, Method arrays since AllocDexCache uses // these roots. - CHECK_NE(0U, boot_class_path.size()); + if (boot_class_path.empty()) { + *error_msg = "Boot classpath is empty."; + return false; + } for (auto& dex_file : boot_class_path) { - CHECK(dex_file.get() != nullptr); + if (dex_file.get() == nullptr) { + *error_msg = "Null dex file."; + return false; + } AppendToBootClassPath(self, *dex_file); opened_dex_files_.push_back(std::move(dex_file)); } @@ -660,6 +667,8 @@ void ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> b FinishInit(self); VLOG(startup) << "ClassLinker::InitFromCompiler exiting"; + + return true; } void ClassLinker::FinishInit(Thread* self) { @@ -850,7 +859,7 @@ class SetInterpreterEntrypointArtMethodVisitor : public ArtMethodVisitor { DISALLOW_COPY_AND_ASSIGN(SetInterpreterEntrypointArtMethodVisitor); }; -void ClassLinker::InitFromImage() { +bool ClassLinker::InitFromImage(std::string* error_msg) { VLOG(startup) << "ClassLinker::InitFromImage entering"; CHECK(!init_done_); @@ -895,22 +904,32 @@ void ClassLinker::InitFromImage() { java_lang_Object->GetObjectSize(), VoidFunctor())); - CHECK_EQ(oat_file->GetOatHeader().GetDexFileCount(), - static_cast<uint32_t>(dex_caches->GetLength())); + if (oat_file->GetOatHeader().GetDexFileCount() != + static_cast<uint32_t>(dex_caches->GetLength())) { + *error_msg = "Dex cache count and dex file count mismatch while trying to initialize from " + "image"; + return false; + } for (int32_t i = 0; i < dex_caches->GetLength(); i++) { StackHandleScope<1> hs2(self); Handle<mirror::DexCache> dex_cache(hs2.NewHandle(dex_caches->Get(i))); const std::string& dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8()); const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file_location.c_str(), nullptr); - CHECK(oat_dex_file != nullptr) << oat_file->GetLocation() << " " << dex_file_location; - std::string error_msg; - std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg); + if (oat_dex_file == nullptr) { + *error_msg = StringPrintf("Failed finding oat dex file for %s %s", + oat_file->GetLocation().c_str(), + dex_file_location.c_str()); + return false; + } + std::string inner_error_msg; + std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&inner_error_msg); if (dex_file == nullptr) { - LOG(FATAL) << "Failed to open dex file " << dex_file_location - << " from within oat file " << oat_file->GetLocation() - << " error '" << error_msg << "'"; - UNREACHABLE(); + *error_msg = StringPrintf("Failed to open dex file %s from within oat file %s error '%s'", + dex_file_location.c_str(), + oat_file->GetLocation().c_str(), + inner_error_msg.c_str()); + return false; } if (kSanityCheckObjects) { @@ -920,13 +939,22 @@ void ClassLinker::InitFromImage() { space); } - CHECK_EQ(dex_file->GetLocationChecksum(), oat_dex_file->GetDexFileLocationChecksum()); + if (dex_file->GetLocationChecksum() != oat_dex_file->GetDexFileLocationChecksum()) { + *error_msg = StringPrintf("Checksums do not match for %s: %x vs %x", + dex_file_location.c_str(), + dex_file->GetLocationChecksum(), + oat_dex_file->GetDexFileLocationChecksum()); + return false; + } AppendToBootClassPath(*dex_file.get(), dex_cache); opened_dex_files_.push_back(std::move(dex_file)); } - CHECK(ValidPointerSize(image_pointer_size_)) << image_pointer_size_; + if (!ValidPointerSize(image_pointer_size_)) { + *error_msg = StringPrintf("Invalid image pointer size: %zu", image_pointer_size_); + return false; + } // Set classes on AbstractMethod early so that IsMethod tests can be performed during the live // bitmap walk. @@ -934,7 +962,12 @@ void ClassLinker::InitFromImage() { // Only the Aot compiler supports having an image with a different pointer size than the // runtime. This happens on the host for compile 32 bit tests since we use a 64 bit libart // compiler. We may also use 32 bit dex2oat on a system with 64 bit apps. - CHECK_EQ(image_pointer_size_, sizeof(void*)); + if (image_pointer_size_ != sizeof(void*)) { + *error_msg = StringPrintf("Runtime must use current image pointer size: %zu vs %zu", + image_pointer_size_ , + sizeof(void*)); + return false; + } } if (kSanityCheckObjects) { @@ -987,6 +1020,8 @@ void ClassLinker::InitFromImage() { FinishInit(self); VLOG(startup) << "ClassLinker::InitFromImage exiting"; + + return true; } bool ClassLinker::ClassInClassTable(mirror::Class* klass) { @@ -6594,7 +6629,9 @@ bool ClassLinker::MayBeCalledWithDirectCodePointer(ArtMethod* m) { } } -jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files) { +jobject ClassLinker::CreatePathClassLoader(Thread* self, + std::vector<const DexFile*>& dex_files, + jobject parent_loader) { // SOAAlreadyRunnable is protected, and we need something to add a global reference. // We could move the jobject to the callers, but all call-sites do this... ScopedObjectAccessUnchecked soa(self); @@ -6625,8 +6662,8 @@ jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFi for (const DexFile* dex_file : dex_files) { StackHandleScope<3> hs2(self); - // CreatePathClassLoader is only used by gtests. Index 0 of h_long_array is supposed to be the - // oat file but we can leave it null. + // CreatePathClassLoader is only used by gtests and dex2oat. Index 0 of h_long_array is + // supposed to be the oat file but we can leave it null. Handle<mirror::LongArray> h_long_array = hs2.NewHandle(mirror::LongArray::Alloc( self, kDexFileIndexStart + 1)); @@ -6672,9 +6709,10 @@ jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFi mirror::Class::FindField(self, hs.NewHandle(h_path_class_loader->GetClass()), "parent", "Ljava/lang/ClassLoader;"); DCHECK(parent_field != nullptr); - mirror::Object* boot_cl = - soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self); - parent_field->SetObject<false>(h_path_class_loader.Get(), boot_cl); + mirror::Object* parent = (parent_loader != nullptr) + ? soa.Decode<mirror::ClassLoader*>(parent_loader) + : soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self); + parent_field->SetObject<false>(h_path_class_loader.Get(), parent); // Make it a global ref and return. ScopedLocalRef<jobject> local_ref( diff --git a/runtime/class_linker.h b/runtime/class_linker.h index a72b58602f..29aac312c1 100644 --- a/runtime/class_linker.h +++ b/runtime/class_linker.h @@ -115,12 +115,15 @@ class ClassLinker { ~ClassLinker(); // Initialize class linker by bootstraping from dex files. - void InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path) + bool InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path, + std::string* error_msg) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_); // Initialize class linker from one or more images. - void InitFromImage() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_); + bool InitFromImage(std::string* error_msg) + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!dex_lock_); // Finds a class by its descriptor, loading it if necessary. // If class_loader is null, searches boot_class_path_. @@ -511,7 +514,10 @@ class ClassLinker { // Creates a GlobalRef PathClassLoader that can be used to load classes from the given dex files. // Note: the objects are not completely set up. Do not use this outside of tests and the compiler. - jobject CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files) + // If parent_loader is null then we use the boot class loader. + jobject CreatePathClassLoader(Thread* self, + std::vector<const DexFile*>& dex_files, + jobject parent_loader) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_); diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc index b6b514177a..f705a50d55 100644 --- a/runtime/common_runtime_test.cc +++ b/runtime/common_runtime_test.cc @@ -553,7 +553,8 @@ jobject CommonRuntimeTest::LoadDex(const char* dex_name) { Thread* self = Thread::Current(); jobject class_loader = Runtime::Current()->GetClassLinker()->CreatePathClassLoader(self, - class_path); + class_path, + nullptr); self->SetClassLoaderOverride(class_loader); return class_loader; } diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index f4cf3ae260..bcfcb89e62 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -134,10 +134,10 @@ void ConcurrentCopying::BindBitmaps() { WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); // Mark all of the spaces we never collect as immune. for (const auto& space : heap_->GetContinuousSpaces()) { - if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect - || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) { + if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect || + space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) { CHECK(space->IsZygoteSpace() || space->IsImageSpace()); - CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space; + immune_spaces_.AddSpace(space); const char* bitmap_name = space->IsImageSpace() ? "cc image space bitmap" : "cc zygote space bitmap"; // TODO: try avoiding using bitmaps for image/zygote to save space. @@ -164,7 +164,7 @@ void ConcurrentCopying::InitializePhase() { << reinterpret_cast<void*>(region_space_->Limit()); } CheckEmptyMarkStack(); - immune_region_.Reset(); + immune_spaces_.Reset(); bytes_moved_.StoreRelaxed(0); objects_moved_.StoreRelaxed(0); if (GetCurrentIteration()->GetGcCause() == kGcCauseExplicit || @@ -177,7 +177,11 @@ void ConcurrentCopying::InitializePhase() { BindBitmaps(); if (kVerboseMode) { LOG(INFO) << "force_evacuate_all=" << force_evacuate_all_; - LOG(INFO) << "Immune region: " << immune_region_.Begin() << "-" << immune_region_.End(); + LOG(INFO) << "Largest immune region: " << immune_spaces_.GetLargestImmuneRegion().Begin() + << "-" << immune_spaces_.GetLargestImmuneRegion().End(); + for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) { + LOG(INFO) << "Immune space: " << *space; + } LOG(INFO) << "GC end of InitializePhase"; } } @@ -300,7 +304,7 @@ class ConcurrentCopyingImmuneSpaceObjVisitor { void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) SHARED_REQUIRES(Locks::heap_bitmap_lock_) { DCHECK(obj != nullptr); - DCHECK(collector_->immune_region_.ContainsObject(obj)); + DCHECK(collector_->immune_spaces_.ContainsObject(obj)); accounting::ContinuousSpaceBitmap* cc_bitmap = collector_->cc_heap_bitmap_->GetContinuousSpaceBitmap(obj); DCHECK(cc_bitmap != nullptr) @@ -383,15 +387,13 @@ void ConcurrentCopying::MarkingPhase() { } // Immune spaces. - for (auto& space : heap_->GetContinuousSpaces()) { - if (immune_region_.ContainsSpace(space)) { - DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); - accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap(); - ConcurrentCopyingImmuneSpaceObjVisitor visitor(this); - live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()), - reinterpret_cast<uintptr_t>(space->Limit()), - visitor); - } + for (auto& space : immune_spaces_.GetSpaces()) { + DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); + accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap(); + ConcurrentCopyingImmuneSpaceObjVisitor visitor(this); + live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()), + reinterpret_cast<uintptr_t>(space->Limit()), + visitor); } Thread* self = Thread::Current(); @@ -1078,7 +1080,7 @@ inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) { !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) { // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We // will change it to black or white later in ReferenceQueue::DequeuePendingReference(). - CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref; + DCHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref; } else { // We may occasionally leave a Reference black or white in the queue if its referent happens to // be concurrently marked after the Scan() call above has enqueued the Reference, in which case @@ -1087,9 +1089,10 @@ inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) { if (kUseBakerReadBarrier) { if (region_space_->IsInToSpace(to_ref)) { // If to-space, change from gray to white. - bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), - ReadBarrier::WhitePtr()); - CHECK(success) << "Must succeed as we won the race."; + bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>( + ReadBarrier::GrayPtr(), + ReadBarrier::WhitePtr()); + DCHECK(success) << "Must succeed as we won the race."; DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr()); } else { // If non-moving space/unevac from space, change from gray @@ -1099,9 +1102,10 @@ inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) { // indicate non-moving objects that have been marked // through. Note we'd need to change from black to white // later (concurrently). - bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), - ReadBarrier::BlackPtr()); - CHECK(success) << "Must succeed as we won the race."; + bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>( + ReadBarrier::GrayPtr(), + ReadBarrier::BlackPtr()); + DCHECK(success) << "Must succeed as we won the race."; DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr()); } } @@ -1205,7 +1209,7 @@ void ConcurrentCopying::Sweep(bool swap_bitmaps) { for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->IsContinuousMemMapAllocSpace()) { space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace(); - if (space == region_space_ || immune_region_.ContainsSpace(space)) { + if (space == region_space_ || immune_spaces_.ContainsSpace(space)) { continue; } TimingLogger::ScopedTiming split2( @@ -1225,9 +1229,6 @@ class ConcurrentCopyingClearBlackPtrsVisitor { public: explicit ConcurrentCopyingClearBlackPtrsVisitor(ConcurrentCopying* cc) : collector_(cc) {} -#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER - NO_RETURN -#endif void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) SHARED_REQUIRES(Locks::heap_bitmap_lock_) { DCHECK(obj != nullptr); @@ -1507,8 +1508,8 @@ void ConcurrentCopying::LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset } } else { // In a non-moving space. - if (immune_region_.ContainsObject(obj)) { - LOG(INFO) << "holder is in the image or the zygote space."; + if (immune_spaces_.ContainsObject(obj)) { + LOG(INFO) << "holder is in an immune image or the zygote space."; accounting::ContinuousSpaceBitmap* cc_bitmap = cc_heap_bitmap_->GetContinuousSpaceBitmap(obj); CHECK(cc_bitmap != nullptr) @@ -1519,7 +1520,7 @@ void ConcurrentCopying::LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset LOG(INFO) << "holder is NOT marked in the bit map."; } } else { - LOG(INFO) << "holder is in a non-moving (or main) space."; + LOG(INFO) << "holder is in a non-immune, non-moving (or main) space."; accounting::ContinuousSpaceBitmap* mark_bitmap = heap_mark_bitmap_->GetContinuousSpaceBitmap(obj); accounting::LargeObjectBitmap* los_bitmap = @@ -1547,7 +1548,7 @@ void ConcurrentCopying::LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset void ConcurrentCopying::AssertToSpaceInvariantInNonMovingSpace(mirror::Object* obj, mirror::Object* ref) { // In a non-moving spaces. Check that the ref is marked. - if (immune_region_.ContainsObject(ref)) { + if (immune_spaces_.ContainsObject(ref)) { accounting::ContinuousSpaceBitmap* cc_bitmap = cc_heap_bitmap_->GetContinuousSpaceBitmap(ref); CHECK(cc_bitmap != nullptr) @@ -1932,7 +1933,7 @@ mirror::Object* ConcurrentCopying::IsMarked(mirror::Object* from_ref) { } } else { // from_ref is in a non-moving space. - if (immune_region_.ContainsObject(from_ref)) { + if (immune_spaces_.ContainsObject(from_ref)) { accounting::ContinuousSpaceBitmap* cc_bitmap = cc_heap_bitmap_->GetContinuousSpaceBitmap(from_ref); DCHECK(cc_bitmap != nullptr) @@ -1986,7 +1987,7 @@ bool ConcurrentCopying::IsOnAllocStack(mirror::Object* ref) { mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref) { // ref is in a non-moving space (from_ref == to_ref). DCHECK(!region_space_->HasAddress(ref)) << ref; - if (immune_region_.ContainsObject(ref)) { + if (immune_spaces_.ContainsObject(ref)) { accounting::ContinuousSpaceBitmap* cc_bitmap = cc_heap_bitmap_->GetContinuousSpaceBitmap(ref); DCHECK(cc_bitmap != nullptr) diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index 27726e23c1..5d21c599e4 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -19,7 +19,7 @@ #include "barrier.h" #include "garbage_collector.h" -#include "immune_region.h" +#include "immune_spaces.h" #include "jni.h" #include "object_callbacks.h" #include "offsets.h" @@ -200,7 +200,7 @@ class ConcurrentCopying : public GarbageCollector { bool is_marking_; // True while marking is ongoing. bool is_active_; // True while the collection is ongoing. bool is_asserting_to_space_invariant_; // True while asserting the to-space invariant. - ImmuneRegion immune_region_; + ImmuneSpaces immune_spaces_; std::unique_ptr<accounting::HeapBitmap> cc_heap_bitmap_; std::vector<accounting::SpaceBitmap<kObjectAlignment>*> cc_bitmaps_; accounting::SpaceBitmap<kObjectAlignment>* region_space_bitmap_; diff --git a/runtime/gc/collector/immune_region.cc b/runtime/gc/collector/immune_region.cc index 3e1c944302..8a04c178b5 100644 --- a/runtime/gc/collector/immune_region.cc +++ b/runtime/gc/collector/immune_region.cc @@ -32,39 +32,6 @@ void ImmuneRegion::Reset() { SetEnd(nullptr); } -bool ImmuneRegion::AddContinuousSpace(space::ContinuousSpace* space) { - // Bind live to mark bitmap if necessary. - if (space->GetLiveBitmap() != space->GetMarkBitmap()) { - CHECK(space->IsContinuousMemMapAllocSpace()); - space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap(); - } - mirror::Object* space_begin = reinterpret_cast<mirror::Object*>(space->Begin()); - mirror::Object* space_limit = reinterpret_cast<mirror::Object*>(space->Limit()); - if (IsEmpty()) { - SetBegin(space_begin); - SetEnd(space_limit); - } else { - if (space_limit <= begin_) { // Space is before the immune region. - SetBegin(space_begin); - } else if (space_begin >= end_) { // Space is after the immune region. - SetEnd(space_limit); - } else { - return false; - } - } - return true; -} - -bool ImmuneRegion::ContainsSpace(const space::ContinuousSpace* space) const { - bool contains = - begin_ <= reinterpret_cast<mirror::Object*>(space->Begin()) && - end_ >= reinterpret_cast<mirror::Object*>(space->Limit()); - if (kIsDebugBuild && contains) { - // A bump pointer space shoult not be in the immune region. - DCHECK(space->GetType() != space::kSpaceTypeBumpPointerSpace); - } - return contains; -} } // namespace collector } // namespace gc diff --git a/runtime/gc/collector/immune_region.h b/runtime/gc/collector/immune_region.h index 3ead501046..b60426daf0 100644 --- a/runtime/gc/collector/immune_region.h +++ b/runtime/gc/collector/immune_region.h @@ -39,35 +39,34 @@ namespace collector { class ImmuneRegion { public: ImmuneRegion(); + void Reset(); - bool AddContinuousSpace(space::ContinuousSpace* space) - REQUIRES(Locks::heap_bitmap_lock_); - bool ContainsSpace(const space::ContinuousSpace* space) const; + // Returns true if an object is inside of the immune region (assumed to be marked). - bool ContainsObject(const mirror::Object* obj) const ALWAYS_INLINE { + ALWAYS_INLINE bool ContainsObject(const mirror::Object* obj) const { // Note: Relies on integer underflow behavior. return reinterpret_cast<uintptr_t>(obj) - reinterpret_cast<uintptr_t>(begin_) < size_; } + void SetBegin(mirror::Object* begin) { begin_ = begin; UpdateSize(); } + void SetEnd(mirror::Object* end) { end_ = end; UpdateSize(); } - mirror::Object* Begin() { + mirror::Object* Begin() const { return begin_; } - mirror::Object* End() { + + mirror::Object* End() const { return end_; } private: - bool IsEmpty() const { - return size_ == 0; - } void UpdateSize() { size_ = reinterpret_cast<uintptr_t>(end_) - reinterpret_cast<uintptr_t>(begin_); } diff --git a/runtime/gc/collector/immune_spaces.cc b/runtime/gc/collector/immune_spaces.cc new file mode 100644 index 0000000000..8f9a9e294f --- /dev/null +++ b/runtime/gc/collector/immune_spaces.cc @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "immune_spaces.h" + +#include "gc/space/space-inl.h" +#include "mirror/object.h" + +namespace art { +namespace gc { +namespace collector { + +void ImmuneSpaces::Reset() { + spaces_.clear(); + largest_immune_region_.Reset(); +} + +void ImmuneSpaces::CreateLargestImmuneRegion() { + uintptr_t best_begin = 0u; + uintptr_t best_end = 0u; + uintptr_t cur_begin = 0u; + uintptr_t cur_end = 0u; + // TODO: If the last space is an image space, we may include its oat file in the immune region. + // This could potentially hide heap corruption bugs if there is invalid pointers that point into + // the boot oat code + for (space::ContinuousSpace* space : GetSpaces()) { + uintptr_t space_begin = reinterpret_cast<uintptr_t>(space->Begin()); + uintptr_t space_end = reinterpret_cast<uintptr_t>(space->Limit()); + if (space->IsImageSpace()) { + // For the boot image, the boot oat file is always directly after. For app images it may not + // be if the app image was mapped at a random address. + space::ImageSpace* image_space = space->AsImageSpace(); + // Update the end to include the other non-heap sections. + space_end = RoundUp(reinterpret_cast<uintptr_t>(image_space->GetImageEnd()), kPageSize); + uintptr_t oat_begin = reinterpret_cast<uintptr_t>(image_space->GetOatFileBegin()); + uintptr_t oat_end = reinterpret_cast<uintptr_t>(image_space->GetOatFileEnd()); + if (space_end == oat_begin) { + DCHECK_GE(oat_end, oat_begin); + space_end = oat_end; + } + } + if (cur_begin == 0u) { + cur_begin = space_begin; + cur_end = space_end; + } else if (cur_end == space_begin) { + // Extend current region. + cur_end = space_end; + } else { + // Reset. + cur_begin = 0; + cur_end = 0; + } + if (cur_end - cur_begin > best_end - best_begin) { + // Improvement, update the best range. + best_begin = cur_begin; + best_end = cur_end; + } + } + largest_immune_region_.SetBegin(reinterpret_cast<mirror::Object*>(best_begin)); + largest_immune_region_.SetEnd(reinterpret_cast<mirror::Object*>(best_end)); +} + +void ImmuneSpaces::AddSpace(space::ContinuousSpace* space) { + DCHECK(spaces_.find(space) == spaces_.end()) << *space; + // Bind live to mark bitmap if necessary. + if (space->GetLiveBitmap() != space->GetMarkBitmap()) { + CHECK(space->IsContinuousMemMapAllocSpace()); + space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap(); + } + spaces_.insert(space); + CreateLargestImmuneRegion(); +} + +bool ImmuneSpaces::CompareByBegin::operator()(space::ContinuousSpace* a, space::ContinuousSpace* b) + const { + return a->Begin() < b->Begin(); +} + +bool ImmuneSpaces::ContainsSpace(space::ContinuousSpace* space) const { + return spaces_.find(space) != spaces_.end(); +} + +} // namespace collector +} // namespace gc +} // namespace art diff --git a/runtime/gc/collector/immune_spaces.h b/runtime/gc/collector/immune_spaces.h new file mode 100644 index 0000000000..72cb60d465 --- /dev/null +++ b/runtime/gc/collector/immune_spaces.h @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_ +#define ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_ + +#include "base/macros.h" +#include "base/mutex.h" +#include "gc/space/space.h" +#include "immune_region.h" + +#include <set> + +namespace art { +namespace gc { +namespace space { +class ContinuousSpace; +} // namespace space + +namespace collector { + +// ImmuneSpaces is a set of spaces which are not going to have any objects become marked during the +// GC. +class ImmuneSpaces { + class CompareByBegin { + public: + bool operator()(space::ContinuousSpace* a, space::ContinuousSpace* b) const; + }; + + public: + ImmuneSpaces() {} + void Reset(); + + // Add a continuous space to the immune spaces set. + void AddSpace(space::ContinuousSpace* space) REQUIRES(Locks::heap_bitmap_lock_); + + // Returns true if an object is inside of the immune region (assumed to be marked). Only returns + // true for the largest immune region. The object can still be inside of an immune space. + ALWAYS_INLINE bool IsInImmuneRegion(const mirror::Object* obj) const { + return largest_immune_region_.ContainsObject(obj); + } + + // Return true if the spaces is contained. + bool ContainsSpace(space::ContinuousSpace* space) const; + + // Return the set of spaces in the immune region. + const std::set<space::ContinuousSpace*, CompareByBegin>& GetSpaces() { + return spaces_; + } + + // Return the associated largest immune region. + const ImmuneRegion& GetLargestImmuneRegion() const { + return largest_immune_region_; + } + + // Return true if the object is contained by any of the immune space.s + ALWAYS_INLINE bool ContainsObject(const mirror::Object* obj) const { + if (largest_immune_region_.ContainsObject(obj)) { + return true; + } + for (space::ContinuousSpace* space : spaces_) { + if (space->HasAddress(obj)) { + return true; + } + } + return false; + } + + private: + // Setup the immune region to the largest continuous set of immune spaces. The immune region is + // just the for the fast path lookup. + void CreateLargestImmuneRegion(); + + std::set<space::ContinuousSpace*, CompareByBegin> spaces_; + ImmuneRegion largest_immune_region_; +}; + +} // namespace collector +} // namespace gc +} // namespace art + +#endif // ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_ diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc new file mode 100644 index 0000000000..f741117bc1 --- /dev/null +++ b/runtime/gc/collector/immune_spaces_test.cc @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common_runtime_test.h" +#include "gc/collector/immune_spaces.h" +#include "gc/space/image_space.h" +#include "gc/space/space-inl.h" +#include "oat_file.h" +#include "thread-inl.h" + +namespace art { +namespace mirror { +class Object; +} // namespace mirror +namespace gc { +namespace collector { + +class ImmuneSpacesTest : public CommonRuntimeTest {}; + +class DummySpace : public space::ContinuousSpace { + public: + DummySpace(uint8_t* begin, uint8_t* end) + : ContinuousSpace("DummySpace", + space::kGcRetentionPolicyNeverCollect, + begin, + end, + /*limit*/end) {} + + space::SpaceType GetType() const OVERRIDE { + return space::kSpaceTypeMallocSpace; + } + + bool CanMoveObjects() const OVERRIDE { + return false; + } + + accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE { + return nullptr; + } + + accounting::ContinuousSpaceBitmap* GetMarkBitmap() const OVERRIDE { + return nullptr; + } +}; + +TEST_F(ImmuneSpacesTest, AppendBasic) { + ImmuneSpaces spaces; + uint8_t* const base = reinterpret_cast<uint8_t*>(0x1000); + DummySpace a(base, base + 45 * KB); + DummySpace b(a.Limit(), a.Limit() + 813 * KB); + { + WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); + spaces.AddSpace(&a); + spaces.AddSpace(&b); + } + EXPECT_TRUE(spaces.ContainsSpace(&a)); + EXPECT_TRUE(spaces.ContainsSpace(&b)); + EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()), a.Begin()); + EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), b.Limit()); +} + +class DummyImageSpace : public space::ImageSpace { + public: + DummyImageSpace(MemMap* map, accounting::ContinuousSpaceBitmap* live_bitmap) + : ImageSpace("DummyImageSpace", + /*image_location*/"", + map, + live_bitmap, + map->End()) {} + + // OatSize is how large the oat file is after the image. + static DummyImageSpace* Create(size_t size, size_t oat_size) { + std::string error_str; + std::unique_ptr<MemMap> map(MemMap::MapAnonymous("DummyImageSpace", + nullptr, + size, + PROT_READ | PROT_WRITE, + /*low_4gb*/true, + /*reuse*/false, + &error_str)); + if (map == nullptr) { + LOG(ERROR) << error_str; + return nullptr; + } + std::unique_ptr<accounting::ContinuousSpaceBitmap> live_bitmap( + accounting::ContinuousSpaceBitmap::Create("bitmap", map->Begin(), map->Size())); + if (live_bitmap == nullptr) { + return nullptr; + } + // Create image header. + ImageSection sections[ImageHeader::kSectionCount]; + new (map->Begin()) ImageHeader( + /*image_begin*/PointerToLowMemUInt32(map->Begin()), + /*image_size*/map->Size(), + sections, + /*image_roots*/PointerToLowMemUInt32(map->Begin()) + 1, + /*oat_checksum*/0u, + /*oat_file_begin*/PointerToLowMemUInt32(map->End()), + /*oat_data_begin*/PointerToLowMemUInt32(map->End()), + /*oat_data_end*/PointerToLowMemUInt32(map->End() + oat_size), + /*oat_file_end*/PointerToLowMemUInt32(map->End() + oat_size), + /*pointer_size*/sizeof(void*), + /*compile_pic*/false); + return new DummyImageSpace(map.release(), live_bitmap.release()); + } +}; + +TEST_F(ImmuneSpacesTest, AppendAfterImage) { + ImmuneSpaces spaces; + constexpr size_t image_size = 123 * kPageSize; + constexpr size_t image_oat_size = 321 * kPageSize; + std::unique_ptr<DummyImageSpace> image_space(DummyImageSpace::Create(image_size, image_oat_size)); + ASSERT_TRUE(image_space != nullptr); + const ImageHeader& image_header = image_space->GetImageHeader(); + EXPECT_EQ(image_header.GetImageSize(), image_size); + EXPECT_EQ(static_cast<size_t>(image_header.GetOatFileEnd() - image_header.GetOatFileBegin()), + image_oat_size); + DummySpace space(image_header.GetOatFileEnd(), image_header.GetOatFileEnd() + 813 * kPageSize); + EXPECT_NE(image_space->Limit(), space.Begin()); + { + WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); + spaces.AddSpace(image_space.get()); + spaces.AddSpace(&space); + } + EXPECT_TRUE(spaces.ContainsSpace(image_space.get())); + EXPECT_TRUE(spaces.ContainsSpace(&space)); + // CreateLargestImmuneRegion should have coalesced the two spaces since the oat code after the + // image prevents gaps. + // Check that we have a continuous region. + EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()), + image_space->Begin()); + EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), space.Limit()); +} + +} // namespace collector +} // namespace gc +} // namespace art diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index f561764ce4..ce6467a6cf 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -45,7 +45,7 @@ void MarkCompact::BindBitmaps() { for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) { - CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space; + immune_spaces_.AddSpace(space); } } } @@ -115,7 +115,7 @@ void MarkCompact::InitializePhase() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); mark_stack_ = heap_->GetMarkStack(); DCHECK(mark_stack_ != nullptr); - immune_region_.Reset(); + immune_spaces_.Reset(); CHECK(space_->CanMoveObjects()) << "Attempting compact non-movable space from " << *space_; // TODO: I don't think we should need heap bitmap lock to Get the mark bitmap. ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); @@ -148,7 +148,7 @@ inline mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) { // Verify all the objects have the correct forward pointer installed. obj->AssertReadBarrierPointer(); } - if (!immune_region_.ContainsObject(obj)) { + if (!immune_spaces_.IsInImmuneRegion(obj)) { if (objects_before_forwarding_->HasAddress(obj)) { if (!objects_before_forwarding_->Set(obj)) { MarkStackPush(obj); // This object was not previously marked. @@ -218,7 +218,7 @@ void MarkCompact::UpdateAndMarkModUnion() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); for (auto& space : heap_->GetContinuousSpaces()) { // If the space is immune then we need to mark the references to other spaces. - if (immune_region_.ContainsSpace(space)) { + if (immune_spaces_.ContainsSpace(space)) { accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); if (table != nullptr) { // TODO: Improve naming. @@ -475,7 +475,7 @@ inline mirror::Object* MarkCompact::GetMarkedForwardAddress(mirror::Object* obj) } mirror::Object* MarkCompact::IsMarked(mirror::Object* object) { - if (immune_region_.ContainsObject(object)) { + if (immune_spaces_.IsInImmuneRegion(object)) { return object; } if (updating_references_) { @@ -498,7 +498,7 @@ void MarkCompact::SweepSystemWeaks() { } bool MarkCompact::ShouldSweepSpace(space::ContinuousSpace* space) const { - return space != space_ && !immune_region_.ContainsSpace(space); + return space != space_ && !immune_spaces_.ContainsSpace(space); } class MoveObjectVisitor { diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h index 8d91939057..8a12094168 100644 --- a/runtime/gc/collector/mark_compact.h +++ b/runtime/gc/collector/mark_compact.h @@ -26,7 +26,7 @@ #include "garbage_collector.h" #include "gc_root.h" #include "gc/accounting/heap_bitmap.h" -#include "immune_region.h" +#include "immune_spaces.h" #include "lock_word.h" #include "object_callbacks.h" #include "offsets.h" @@ -194,8 +194,8 @@ class MarkCompact : public GarbageCollector { accounting::ObjectStack* mark_stack_; - // Immune region, every object inside the immune region is assumed to be marked. - ImmuneRegion immune_region_; + // Every object inside the immune spaces is assumed to be marked. + ImmuneSpaces immune_spaces_; // Bump pointer space which we are collecting. space::BumpPointerSpace* space_; diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index db516a0a87..5427f88563 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -86,7 +86,7 @@ void MarkSweep::BindBitmaps() { // Mark all of the spaces we never collect as immune. for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect) { - CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space; + immune_spaces_.AddSpace(space); } } } @@ -115,7 +115,7 @@ void MarkSweep::InitializePhase() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); mark_stack_ = heap_->GetMarkStack(); DCHECK(mark_stack_ != nullptr); - immune_region_.Reset(); + immune_spaces_.Reset(); no_reference_class_count_.StoreRelaxed(0); normal_count_.StoreRelaxed(0); class_count_.StoreRelaxed(0); @@ -268,16 +268,41 @@ void MarkSweep::MarkingPhase() { PreCleanCards(); } +class ScanObjectVisitor { + public: + explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE + : mark_sweep_(mark_sweep) {} + + void operator()(mirror::Object* obj) const + ALWAYS_INLINE + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_) { + if (kCheckLocks) { + Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); + Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current()); + } + mark_sweep_->ScanObject(obj); + } + + private: + MarkSweep* const mark_sweep_; +}; + void MarkSweep::UpdateAndMarkModUnion() { - for (const auto& space : heap_->GetContinuousSpaces()) { - if (immune_region_.ContainsSpace(space)) { - const char* name = space->IsZygoteSpace() - ? "UpdateAndMarkZygoteModUnionTable" - : "UpdateAndMarkImageModUnionTable"; - TimingLogger::ScopedTiming t(name, GetTimings()); - accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space); - CHECK(mod_union_table != nullptr); + for (const auto& space : immune_spaces_.GetSpaces()) { + const char* name = space->IsZygoteSpace() + ? "UpdateAndMarkZygoteModUnionTable" + : "UpdateAndMarkImageModUnionTable"; + DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space; + TimingLogger::ScopedTiming t(name, GetTimings()); + accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space); + if (mod_union_table != nullptr) { mod_union_table->UpdateAndMarkReferences(this); + } else { + // No mod-union table, scan all the live bits. This can only occur for app images. + space->GetLiveBitmap()->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()), + reinterpret_cast<uintptr_t>(space->End()), + ScanObjectVisitor(this)); } } } @@ -460,7 +485,7 @@ inline void MarkSweep::MarkObjectNonNull(mirror::Object* obj, // Verify all the objects have the correct pointer installed. obj->AssertReadBarrierPointer(); } - if (immune_region_.ContainsObject(obj)) { + if (immune_spaces_.IsInImmuneRegion(obj)) { if (kCountMarkedObjects) { ++mark_immune_count_; } @@ -501,7 +526,7 @@ inline bool MarkSweep::MarkObjectParallel(mirror::Object* obj) { // Verify all the objects have the correct pointer installed. obj->AssertReadBarrierPointer(); } - if (immune_region_.ContainsObject(obj)) { + if (immune_spaces_.IsInImmuneRegion(obj)) { DCHECK(IsMarked(obj) != nullptr); return false; } @@ -606,26 +631,6 @@ void MarkSweep::MarkConcurrentRoots(VisitRootFlags flags) { this, static_cast<VisitRootFlags>(flags | kVisitRootFlagNonMoving)); } -class ScanObjectVisitor { - public: - explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE - : mark_sweep_(mark_sweep) {} - - void operator()(mirror::Object* obj) const - ALWAYS_INLINE - REQUIRES(Locks::heap_bitmap_lock_) - SHARED_REQUIRES(Locks::mutator_lock_) { - if (kCheckLocks) { - Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); - Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current()); - } - mark_sweep_->ScanObject(obj); - } - - private: - MarkSweep* const mark_sweep_; -}; - class DelayReferenceReferentVisitor { public: explicit DelayReferenceReferentVisitor(MarkSweep* collector) : collector_(collector) {} @@ -1193,7 +1198,8 @@ void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitma std::vector<space::ContinuousSpace*> sweep_spaces; space::ContinuousSpace* non_moving_space = nullptr; for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) { - if (space->IsAllocSpace() && !immune_region_.ContainsSpace(space) && + if (space->IsAllocSpace() && + !immune_spaces_.ContainsSpace(space) && space->GetLiveBitmap() != nullptr) { if (space == heap_->GetNonMovingSpace()) { non_moving_space = space; @@ -1422,7 +1428,7 @@ void MarkSweep::ProcessMarkStack(bool paused) { } inline mirror::Object* MarkSweep::IsMarked(mirror::Object* object) { - if (immune_region_.ContainsObject(object)) { + if (immune_spaces_.IsInImmuneRegion(object)) { return object; } if (current_space_bitmap_->HasAddress(object)) { diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h index 8f7df78d53..245f96bdb3 100644 --- a/runtime/gc/collector/mark_sweep.h +++ b/runtime/gc/collector/mark_sweep.h @@ -26,7 +26,7 @@ #include "garbage_collector.h" #include "gc_root.h" #include "gc/accounting/heap_bitmap.h" -#include "immune_region.h" +#include "immune_spaces.h" #include "object_callbacks.h" #include "offsets.h" @@ -314,8 +314,9 @@ class MarkSweep : public GarbageCollector { accounting::ObjectStack* mark_stack_; - // Immune region, every object inside the immune range is assumed to be marked. - ImmuneRegion immune_region_; + // Every object inside the immune spaces is assumed to be marked. Immune spaces that aren't in the + // immune region are handled by the normal marking logic. + ImmuneSpaces immune_spaces_; // Parallel finger. AtomicInteger atomic_finger_; diff --git a/runtime/gc/collector/partial_mark_sweep.cc b/runtime/gc/collector/partial_mark_sweep.cc index 15f782aea8..984779484e 100644 --- a/runtime/gc/collector/partial_mark_sweep.cc +++ b/runtime/gc/collector/partial_mark_sweep.cc @@ -39,7 +39,7 @@ void PartialMarkSweep::BindBitmaps() { for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) { CHECK(space->IsZygoteSpace()); - CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space; + immune_spaces_.AddSpace(space); } } } diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h index 06d20f583a..12cf3dbf98 100644 --- a/runtime/gc/collector/semi_space-inl.h +++ b/runtime/gc/collector/semi_space-inl.h @@ -74,7 +74,7 @@ inline void SemiSpace::MarkObject( MarkStackPush(forward_address); } obj_ptr->Assign(forward_address); - } else if (!collect_from_space_only_ && !immune_region_.ContainsObject(obj)) { + } else if (!collect_from_space_only_ && !immune_spaces_.IsInImmuneRegion(obj)) { BitmapSetSlowPathVisitor visitor(this); if (!mark_bitmap_->Set(obj, visitor)) { // This object was not previously marked. diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc index 7f57f30b27..e9497a2223 100644 --- a/runtime/gc/collector/semi_space.cc +++ b/runtime/gc/collector/semi_space.cc @@ -66,8 +66,9 @@ void SemiSpace::BindBitmaps() { for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) { - CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space; + immune_spaces_.AddSpace(space); } else if (space->GetLiveBitmap() != nullptr) { + // TODO: We can probably also add this space to the immune region. if (space == to_space_ || collect_from_space_only_) { if (collect_from_space_only_) { // Bind the bitmaps of the main free list space and the non-moving space we are doing a @@ -144,7 +145,7 @@ void SemiSpace::InitializePhase() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); mark_stack_ = heap_->GetMarkStack(); DCHECK(mark_stack_ != nullptr); - immune_region_.Reset(); + immune_spaces_.Reset(); is_large_object_space_immune_ = false; saved_bytes_ = 0; bytes_moved_ = 0; @@ -376,7 +377,13 @@ void SemiSpace::MarkReachableObjects() { << "generational_=" << generational_ << " " << "collect_from_space_only_=" << collect_from_space_only_; accounting::RememberedSet* rem_set = GetHeap()->FindRememberedSetFromSpace(space); - CHECK_EQ(rem_set != nullptr, kUseRememberedSet); + if (kUseRememberedSet) { + // App images currently do not have remembered sets. + DCHECK((space->IsImageSpace() && space != heap_->GetBootImageSpace()) || + rem_set != nullptr); + } else { + DCHECK(rem_set == nullptr); + } if (rem_set != nullptr) { TimingLogger::ScopedTiming t2("UpdateAndMarkRememberedSet", GetTimings()); rem_set->UpdateAndMarkReferences(from_space_, this); @@ -767,7 +774,8 @@ mirror::Object* SemiSpace::IsMarked(mirror::Object* obj) { if (from_space_->HasAddress(obj)) { // Returns either the forwarding address or null. return GetForwardingAddressInFromSpace(obj); - } else if (collect_from_space_only_ || immune_region_.ContainsObject(obj) || + } else if (collect_from_space_only_ || + immune_spaces_.IsInImmuneRegion(obj) || to_space_->HasAddress(obj)) { return obj; // Already forwarded, must be marked. } diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h index b9246ca2fc..a905904115 100644 --- a/runtime/gc/collector/semi_space.h +++ b/runtime/gc/collector/semi_space.h @@ -25,7 +25,7 @@ #include "garbage_collector.h" #include "gc_root.h" #include "gc/accounting/heap_bitmap.h" -#include "immune_region.h" +#include "immune_spaces.h" #include "mirror/object_reference.h" #include "object_callbacks.h" #include "offsets.h" @@ -201,8 +201,8 @@ class SemiSpace : public GarbageCollector { // object. accounting::ObjectStack* mark_stack_; - // Immune region, every object inside the immune region is assumed to be marked. - ImmuneRegion immune_region_; + // Every object inside the immune spaces is assumed to be marked. + ImmuneSpaces immune_spaces_; // If true, the large object space is immune. bool is_large_object_space_immune_; diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc index 77f606ddce..e754a52e7e 100644 --- a/runtime/gc/space/dlmalloc_space.cc +++ b/runtime/gc/space/dlmalloc_space.cc @@ -20,6 +20,8 @@ #include "gc/accounting/card_table.h" #include "gc/accounting/space_bitmap-inl.h" #include "gc/heap.h" +#include "jit/jit.h" +#include "jit/jit_code_cache.h" #include "memory_tool_malloc_space-inl.h" #include "mirror/class-inl.h" #include "mirror/object-inl.h" @@ -318,10 +320,17 @@ namespace allocator { // Implement the dlmalloc morecore callback. void* ArtDlMallocMoreCore(void* mspace, intptr_t increment) { - Heap* heap = Runtime::Current()->GetHeap(); + Runtime* runtime = Runtime::Current(); + Heap* heap = runtime->GetHeap(); ::art::gc::space::DlMallocSpace* dlmalloc_space = heap->GetDlMallocSpace(); // Support for multiple DlMalloc provided by a slow path. if (UNLIKELY(dlmalloc_space == nullptr || dlmalloc_space->GetMspace() != mspace)) { + if (LIKELY(runtime->GetJit() != nullptr)) { + jit::JitCodeCache* code_cache = runtime->GetJit()->GetCodeCache(); + if (code_cache->OwnsSpace(mspace)) { + return code_cache->MoreCore(mspace, increment); + } + } dlmalloc_space = nullptr; for (space::ContinuousSpace* space : heap->GetContinuousSpaces()) { if (space->IsDlMallocSpace()) { diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h index 99207426a0..babd672cee 100644 --- a/runtime/gc/space/image_space.h +++ b/runtime/gc/space/image_space.h @@ -119,7 +119,22 @@ class ImageSpace : public MemMapSpace { bool* has_data, bool *is_global_cache); - private: + // Return the end of the image which includes non-heap objects such as ArtMethods and ArtFields. + uint8_t* GetImageEnd() const { + return Begin() + GetImageHeader().GetImageSize(); + } + + // Return the start of the associated oat file. + uint8_t* GetOatFileBegin() const { + return GetImageHeader().GetOatFileBegin(); + } + + // Return the end of the associated oat file. + uint8_t* GetOatFileEnd() const { + return GetImageHeader().GetOatFileEnd(); + } + + protected: // Tries to initialize an ImageSpace from the given image path, // returning null on error. // @@ -157,6 +172,7 @@ class ImageSpace : public MemMapSpace { const std::string image_location_; + private: DISALLOW_COPY_AND_ASSIGN(ImageSpace); }; diff --git a/runtime/image.h b/runtime/image.h index 20e4159b09..555cf5ddb7 100644 --- a/runtime/image.h +++ b/runtime/image.h @@ -84,7 +84,7 @@ class PACKED(4) ImageHeader { image_roots_(0U), pointer_size_(0U), compile_pic_(0) {} ImageHeader(uint32_t image_begin, - uint32_t image_size_, + uint32_t image_size, ImageSection* sections, uint32_t image_roots, uint32_t oat_checksum, @@ -93,7 +93,7 @@ class PACKED(4) ImageHeader { uint32_t oat_data_end, uint32_t oat_file_end, uint32_t pointer_size, - bool compile_pic_); + bool compile_pic); bool IsValid() const; const char* GetMagic() const; diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc index f69115159f..ecbf13c4b1 100644 --- a/runtime/jit/jit.cc +++ b/runtime/jit/jit.cc @@ -34,8 +34,10 @@ namespace jit { JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) { auto* jit_options = new JitOptions; jit_options->use_jit_ = options.GetOrDefault(RuntimeArgumentMap::UseJIT); - jit_options->code_cache_capacity_ = - options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheCapacity); + jit_options->code_cache_initial_capacity_ = + options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheInitialCapacity); + jit_options->code_cache_max_capacity_ = + options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheMaxCapacity); jit_options->compile_threshold_ = options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold); jit_options->warmup_threshold_ = @@ -69,13 +71,15 @@ Jit* Jit::Create(JitOptions* options, std::string* error_msg) { if (!jit->LoadCompiler(error_msg)) { return nullptr; } - jit->code_cache_.reset(JitCodeCache::Create(options->GetCodeCacheCapacity(), error_msg)); + jit->code_cache_.reset(JitCodeCache::Create( + options->GetCodeCacheInitialCapacity(), options->GetCodeCacheMaxCapacity(), error_msg)); if (jit->GetCodeCache() == nullptr) { return nullptr; } - LOG(INFO) << "JIT created with code_cache_capacity=" - << PrettySize(options->GetCodeCacheCapacity()) - << " compile_threshold=" << options->GetCompileThreshold(); + LOG(INFO) << "JIT created with initial_capacity=" + << PrettySize(options->GetCodeCacheInitialCapacity()) + << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity()) + << ", compile_threshold=" << options->GetCompileThreshold(); return jit.release(); } diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h index 1f89f9b1b7..fc76549013 100644 --- a/runtime/jit/jit.h +++ b/runtime/jit/jit.h @@ -102,8 +102,11 @@ class JitOptions { size_t GetWarmupThreshold() const { return warmup_threshold_; } - size_t GetCodeCacheCapacity() const { - return code_cache_capacity_; + size_t GetCodeCacheInitialCapacity() const { + return code_cache_initial_capacity_; + } + size_t GetCodeCacheMaxCapacity() const { + return code_cache_max_capacity_; } bool DumpJitInfoOnShutdown() const { return dump_info_on_shutdown_; @@ -117,13 +120,18 @@ class JitOptions { private: bool use_jit_; - size_t code_cache_capacity_; + size_t code_cache_initial_capacity_; + size_t code_cache_max_capacity_; size_t compile_threshold_; size_t warmup_threshold_; bool dump_info_on_shutdown_; - JitOptions() : use_jit_(false), code_cache_capacity_(0), compile_threshold_(0), - dump_info_on_shutdown_(false) { } + JitOptions() + : use_jit_(false), + code_cache_initial_capacity_(0), + code_cache_max_capacity_(0), + compile_threshold_(0), + dump_info_on_shutdown_(false) { } DISALLOW_COPY_AND_ASSIGN(JitOptions); }; diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index a291a09430..da79109b4f 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -44,73 +44,89 @@ static constexpr int kProtCode = PROT_READ | PROT_EXEC; } \ } while (false) \ -JitCodeCache* JitCodeCache::Create(size_t capacity, std::string* error_msg) { - CHECK_GT(capacity, 0U); - CHECK_LT(capacity, kMaxCapacity); +JitCodeCache* JitCodeCache::Create(size_t initial_capacity, + size_t max_capacity, + std::string* error_msg) { + CHECK_GE(max_capacity, initial_capacity); + // We need to have 32 bit offsets from method headers in code cache which point to things + // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work. + // Ensure we're below 1 GB to be safe. + if (max_capacity > 1 * GB) { + std::ostringstream oss; + oss << "Maxium code cache capacity is limited to 1 GB, " + << PrettySize(max_capacity) << " is too big"; + *error_msg = oss.str(); + return nullptr; + } + std::string error_str; // Map name specific for android_os_Debug.cpp accounting. MemMap* data_map = MemMap::MapAnonymous( - "data-code-cache", nullptr, capacity, kProtAll, false, false, &error_str); + "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str); if (data_map == nullptr) { std::ostringstream oss; - oss << "Failed to create read write execute cache: " << error_str << " size=" << capacity; + oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity; *error_msg = oss.str(); return nullptr; } + // Align both capacities to page size, as that's the unit mspaces use. + initial_capacity = RoundDown(initial_capacity, 2 * kPageSize); + max_capacity = RoundDown(max_capacity, 2 * kPageSize); + // Data cache is 1 / 2 of the map. // TODO: Make this variable? - size_t data_size = RoundUp(data_map->Size() / 2, kPageSize); - size_t code_size = data_map->Size() - data_size; + size_t data_size = max_capacity / 2; + size_t code_size = max_capacity - data_size; + DCHECK_EQ(code_size + data_size, max_capacity); uint8_t* divider = data_map->Begin() + data_size; - // We need to have 32 bit offsets from method headers in code cache which point to things - // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work. MemMap* code_map = data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str); if (code_map == nullptr) { std::ostringstream oss; - oss << "Failed to create read write execute cache: " << error_str << " size=" << capacity; + oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity; *error_msg = oss.str(); return nullptr; } - DCHECK_EQ(code_map->Size(), code_size); DCHECK_EQ(code_map->Begin(), divider); - return new JitCodeCache(code_map, data_map); + data_size = initial_capacity / 2; + code_size = initial_capacity - data_size; + DCHECK_EQ(code_size + data_size, initial_capacity); + return new JitCodeCache(code_map, data_map, code_size, data_size, max_capacity); } -JitCodeCache::JitCodeCache(MemMap* code_map, MemMap* data_map) +JitCodeCache::JitCodeCache(MemMap* code_map, + MemMap* data_map, + size_t initial_code_capacity, + size_t initial_data_capacity, + size_t max_capacity) : lock_("Jit code cache", kJitCodeCacheLock), lock_cond_("Jit code cache variable", lock_), collection_in_progress_(false), code_map_(code_map), - data_map_(data_map) { + data_map_(data_map), + max_capacity_(max_capacity), + current_capacity_(initial_code_capacity + initial_data_capacity), + code_end_(initial_code_capacity), + data_end_(initial_data_capacity), + has_done_one_collection_(false) { - code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_map_->Size(), false /*locked*/); - data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_map_->Size(), false /*locked*/); + code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/); + data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/); if (code_mspace_ == nullptr || data_mspace_ == nullptr) { PLOG(FATAL) << "create_mspace_with_base failed"; } - // Prevent morecore requests from the mspace. - mspace_set_footprint_limit(code_mspace_, code_map_->Size()); - mspace_set_footprint_limit(data_mspace_, data_map_->Size()); + SetFootprintLimit(current_capacity_); CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode); CHECKED_MPROTECT(data_map_->Begin(), data_map_->Size(), kProtData); - live_bitmap_.reset(CodeCacheBitmap::Create("code-cache-bitmap", - reinterpret_cast<uintptr_t>(code_map_->Begin()), - reinterpret_cast<uintptr_t>(code_map_->End()))); - - if (live_bitmap_.get() == nullptr) { - PLOG(FATAL) << "creating bitmaps for the JIT code cache failed"; - } - - VLOG(jit) << "Created jit code cache: data size=" - << PrettySize(data_map_->Size()) - << ", code size=" - << PrettySize(code_map_->Size()); + VLOG(jit) << "Created jit code cache: initial data size=" + << PrettySize(initial_data_capacity) + << ", initial code size=" + << PrettySize(initial_code_capacity); } bool JitCodeCache::ContainsPc(const void* ptr) const { @@ -433,13 +449,48 @@ class MarkCodeClosure FINAL : public Closure { Barrier* const barrier_; }; -void JitCodeCache::GarbageCollectCache(Thread* self) { +void JitCodeCache::NotifyCollectionDone(Thread* self) { + collection_in_progress_ = false; + lock_cond_.Broadcast(self); +} + +void JitCodeCache::SetFootprintLimit(size_t new_footprint) { + size_t per_space_footprint = new_footprint / 2; + DCHECK(IsAlignedParam(per_space_footprint, kPageSize)); + DCHECK_EQ(per_space_footprint * 2, new_footprint); + mspace_set_footprint_limit(data_mspace_, per_space_footprint); + { + ScopedCodeCacheWrite scc(code_map_.get()); + mspace_set_footprint_limit(code_mspace_, per_space_footprint); + } +} + +bool JitCodeCache::IncreaseCodeCacheCapacity() { + if (current_capacity_ == max_capacity_) { + return false; + } + + // Double the capacity if we're below 1MB, or increase it by 1MB if + // we're above. + if (current_capacity_ < 1 * MB) { + current_capacity_ *= 2; + } else { + current_capacity_ += 1 * MB; + } + if (current_capacity_ > max_capacity_) { + current_capacity_ = max_capacity_; + } + if (!kIsDebugBuild || VLOG_IS_ON(jit)) { - LOG(INFO) << "Clearing code cache, code=" - << PrettySize(CodeCacheSize()) - << ", data=" << PrettySize(DataCacheSize()); + LOG(INFO) << "Increasing code cache capacity to " << PrettySize(current_capacity_); } + SetFootprintLimit(current_capacity_); + + return true; +} + +void JitCodeCache::GarbageCollectCache(Thread* self) { instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); // Wait for an existing collection, or let everyone know we are starting one. @@ -452,6 +503,28 @@ void JitCodeCache::GarbageCollectCache(Thread* self) { collection_in_progress_ = true; } } + + // Check if we just need to grow the capacity. If we don't, allocate the bitmap while + // we hold the lock. + { + MutexLock mu(self, lock_); + if (has_done_one_collection_ && IncreaseCodeCacheCapacity()) { + has_done_one_collection_ = false; + NotifyCollectionDone(self); + return; + } else { + live_bitmap_.reset(CodeCacheBitmap::Create( + "code-cache-bitmap", + reinterpret_cast<uintptr_t>(code_map_->Begin()), + reinterpret_cast<uintptr_t>(code_map_->Begin() + current_capacity_ / 2))); + } + } + + if (!kIsDebugBuild || VLOG_IS_ON(jit)) { + LOG(INFO) << "Clearing code cache, code=" + << PrettySize(CodeCacheSize()) + << ", data=" << PrettySize(DataCacheSize()); + } // Walk over all compiled methods and set the entry points of these // methods to interpreter. { @@ -500,7 +573,6 @@ void JitCodeCache::GarbageCollectCache(Thread* self) { } } } - GetLiveBitmap()->Bitmap::Clear(); // Free all profiling info. for (ProfilingInfo* info : profiling_infos_) { @@ -509,8 +581,9 @@ void JitCodeCache::GarbageCollectCache(Thread* self) { } profiling_infos_.clear(); - collection_in_progress_ = false; - lock_cond_.Broadcast(self); + live_bitmap_.reset(nullptr); + has_done_one_collection_ = true; + NotifyCollectionDone(self); } if (!kIsDebugBuild || VLOG_IS_ON(jit)) { @@ -589,5 +662,20 @@ ProfilingInfo* JitCodeCache::AddProfilingInfoInternal(Thread* self, return info; } +// NO_THREAD_SAFETY_ANALYSIS as this is called from mspace code, at which point the lock +// is already held. +void* JitCodeCache::MoreCore(const void* mspace, intptr_t increment) NO_THREAD_SAFETY_ANALYSIS { + if (code_mspace_ == mspace) { + size_t result = code_end_; + code_end_ += increment; + return reinterpret_cast<void*>(result + code_map_->Begin()); + } else { + DCHECK_EQ(data_mspace_, mspace); + size_t result = data_end_; + data_end_ += increment; + return reinterpret_cast<void*>(result + data_map_->Begin()); + } +} + } // namespace jit } // namespace art diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h index 131446c484..13481e0e67 100644 --- a/runtime/jit/jit_code_cache.h +++ b/runtime/jit/jit_code_cache.h @@ -41,20 +41,20 @@ namespace jit { class JitInstrumentationCache; -// Alignment that will suit all architectures. +// Alignment in bits that will suit all architectures. static constexpr int kJitCodeAlignment = 16; using CodeCacheBitmap = gc::accounting::MemoryRangeBitmap<kJitCodeAlignment>; class JitCodeCache { public: - static constexpr size_t kMaxCapacity = 1 * GB; + static constexpr size_t kMaxCapacity = 64 * MB; // Put the default to a very low amount for debug builds to stress the code cache // collection. - static constexpr size_t kDefaultCapacity = kIsDebugBuild ? 20 * KB : 2 * MB; + static constexpr size_t kInitialCapacity = kIsDebugBuild ? 16 * KB : 64 * KB; // Create the code cache with a code + data capacity equal to "capacity", error message is passed // in the out arg error_msg. - static JitCodeCache* Create(size_t capacity, std::string* error_msg); + static JitCodeCache* Create(size_t initial_capacity, size_t max_capacity, std::string* error_msg); // Number of bytes allocated in the code cache. size_t CodeCacheSize() REQUIRES(!lock_); @@ -133,9 +133,19 @@ class JitCodeCache { REQUIRES(!lock_) SHARED_REQUIRES(Locks::mutator_lock_); + bool OwnsSpace(const void* mspace) const NO_THREAD_SAFETY_ANALYSIS { + return mspace == code_mspace_ || mspace == data_mspace_; + } + + void* MoreCore(const void* mspace, intptr_t increment); + private: - // Take ownership of code_mem_map. - JitCodeCache(MemMap* code_map, MemMap* data_map); + // Take ownership of maps. + JitCodeCache(MemMap* code_map, + MemMap* data_map, + size_t initial_code_capacity, + size_t initial_data_capacity, + size_t max_capacity); // Internal version of 'CommitCode' that will not retry if the // allocation fails. Return null if the allocation fails. @@ -172,6 +182,16 @@ class JitCodeCache { // Number of bytes allocated in the data cache. size_t DataCacheSizeLocked() REQUIRES(lock_); + // Notify all waiting threads that a collection is done. + void NotifyCollectionDone(Thread* self) REQUIRES(lock_); + + // Try to increase the current capacity of the code cache. Return whether we + // succeeded at doing so. + bool IncreaseCodeCacheCapacity() REQUIRES(lock_); + + // Set the footprint limit of the code cache. + void SetFootprintLimit(size_t new_footprint) REQUIRES(lock_); + // Lock for guarding allocations, collections, and the method_code_map_. Mutex lock_; // Condition to wait on during collection. @@ -193,6 +213,21 @@ class JitCodeCache { // ProfilingInfo objects we have allocated. std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_); + // The maximum capacity in bytes this code cache can go to. + size_t max_capacity_ GUARDED_BY(lock_); + + // The current capacity in bytes of the code cache. + size_t current_capacity_ GUARDED_BY(lock_); + + // The current footprint in bytes of the code portion of the code cache. + size_t code_end_ GUARDED_BY(lock_); + + // The current footprint in bytes of the data portion of the code cache. + size_t data_end_ GUARDED_BY(lock_); + + // Whether a collection has already been done on the current capacity. + bool has_done_one_collection_ GUARDED_BY(lock_); + DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache); }; diff --git a/runtime/lambda/box_table.cc b/runtime/lambda/box_table.cc index 8eef10bbad..9918bb71f3 100644 --- a/runtime/lambda/box_table.cc +++ b/runtime/lambda/box_table.cc @@ -62,7 +62,7 @@ BoxTable::BoxTable() BoxTable::~BoxTable() { // Free all the copies of our closures. - for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ++map_iterator) { + for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ) { std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator; Closure* closure = key_value_pair.first; diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h index 5c12091ecb..460342807a 100644 --- a/runtime/mirror/object-inl.h +++ b/runtime/mirror/object-inl.h @@ -163,6 +163,7 @@ inline void Object::SetReadBarrierPointer(Object* rb_ptr) { #endif } +template<bool kCasRelease> inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) { #ifdef USE_BAKER_READ_BARRIER DCHECK(kUseBakerReadBarrier); @@ -181,10 +182,13 @@ inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr))); new_lw = lw; new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr))); - // This CAS is a CAS release so that when GC updates all the fields of an object and then - // changes the object from gray to black, the field updates (stores) will be visible (won't be - // reordered after this CAS.) - } while (!CasLockWordWeakRelease(expected_lw, new_lw)); + // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true. + // If kCasRelease == true, use a CAS release so that when GC updates all the fields of + // an object and then changes the object from gray to black, the field updates (stores) will be + // visible (won't be reordered after this CAS.) + } while (!(kCasRelease ? + CasLockWordWeakRelease(expected_lw, new_lw) : + CasLockWordWeakRelaxed(expected_lw, new_lw))); return true; #elif USE_BROOKS_READ_BARRIER DCHECK(kUseBrooksReadBarrier); diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h index 5c6520fcab..71e704e704 100644 --- a/runtime/mirror/object.h +++ b/runtime/mirror/object.h @@ -92,13 +92,13 @@ class MANAGED LOCKABLE Object { void SetClass(Class* new_klass) SHARED_REQUIRES(Locks::mutator_lock_); Object* GetReadBarrierPointer() SHARED_REQUIRES(Locks::mutator_lock_); + #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER NO_RETURN #endif void SetReadBarrierPointer(Object* rb_ptr) SHARED_REQUIRES(Locks::mutator_lock_); -#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER - NO_RETURN -#endif + + template<bool kCasRelease = false> ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) SHARED_REQUIRES(Locks::mutator_lock_); void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_); diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc index ae16c7f373..dfd783b988 100644 --- a/runtime/parsed_options.cc +++ b/runtime/parsed_options.cc @@ -152,9 +152,12 @@ std::unique_ptr<RuntimeParser> ParsedOptions::MakeParser(bool ignore_unrecognize .WithType<bool>() .WithValueMap({{"false", false}, {"true", true}}) .IntoKey(M::UseJIT) - .Define("-Xjitcodecachesize:_") + .Define("-Xjitinitialsize:_") .WithType<MemoryKiB>() - .IntoKey(M::JITCodeCacheCapacity) + .IntoKey(M::JITCodeCacheInitialCapacity) + .Define("-Xjitmaxsize:_") + .WithType<MemoryKiB>() + .IntoKey(M::JITCodeCacheMaxCapacity) .Define("-Xjitthreshold:_") .WithType<unsigned int>() .IntoKey(M::JITCompileThreshold) @@ -640,7 +643,6 @@ void ParsedOptions::Usage(const char* fmt, ...) { UsageMessage(stream, " -XX:ForegroundHeapGrowthMultiplier=doublevalue\n"); UsageMessage(stream, " -XX:LowMemoryMode\n"); UsageMessage(stream, " -Xprofile:{threadcpuclock,wallclock,dualclock}\n"); - UsageMessage(stream, " -Xjitcodecachesize:N\n"); UsageMessage(stream, " -Xjitthreshold:integervalue\n"); UsageMessage(stream, "\n"); @@ -684,6 +686,8 @@ void ParsedOptions::Usage(const char* fmt, ...) { UsageMessage(stream, " -Ximage-compiler-option dex2oat-option\n"); UsageMessage(stream, " -Xpatchoat:filename\n"); UsageMessage(stream, " -Xusejit:booleanvalue\n"); + UsageMessage(stream, " -Xjitinitialsize:N\n"); + UsageMessage(stream, " -Xjitmaxsize:N\n"); UsageMessage(stream, " -X[no]relocate\n"); UsageMessage(stream, " -X[no]dex2oat (Whether to invoke dex2oat on the application)\n"); UsageMessage(stream, " -X[no]image-dex2oat (Whether to create and use a boot image)\n"); @@ -718,6 +722,7 @@ void ParsedOptions::Usage(const char* fmt, ...) { UsageMessage(stream, " -Xjitblocking\n"); UsageMessage(stream, " -Xjitmethod:signature[,signature]* (eg Ljava/lang/String\\;replace)\n"); UsageMessage(stream, " -Xjitclass:classname[,classname]*\n"); + UsageMessage(stream, " -Xjitcodecachesize:N\n"); UsageMessage(stream, " -Xjitoffset:offset[,offset]\n"); UsageMessage(stream, " -Xjitconfig:filename\n"); UsageMessage(stream, " -Xjitcheckcg\n"); diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 0077389801..a210aa8c16 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -1044,8 +1044,13 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) class_linker_ = new ClassLinker(intern_table_); if (GetHeap()->HasImageSpace()) { ATRACE_BEGIN("InitFromImage"); - class_linker_->InitFromImage(); + std::string error_msg; + bool result = class_linker_->InitFromImage(&error_msg); ATRACE_END(); + if (!result) { + LOG(ERROR) << "Could not initialize from image: " << error_msg; + return false; + } if (kIsDebugBuild) { GetHeap()->GetBootImageSpace()->VerifyImageAllocations(); } @@ -1077,7 +1082,11 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) runtime_options.GetOrDefault(Opt::Image), &boot_class_path); instruction_set_ = runtime_options.GetOrDefault(Opt::ImageInstructionSet); - class_linker_->InitWithoutImage(std::move(boot_class_path)); + std::string error_msg; + if (!class_linker_->InitWithoutImage(std::move(boot_class_path), &error_msg)) { + LOG(ERROR) << "Could not initialize without image: " << error_msg; + return false; + } // TODO: Should we move the following to InitWithoutImage? SetInstructionSet(instruction_set_); diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def index 3489834f30..9051eda0df 100644 --- a/runtime/runtime_options.def +++ b/runtime/runtime_options.def @@ -69,7 +69,8 @@ RUNTIME_OPTIONS_KEY (bool, EnableHSpaceCompactForOOM, true) RUNTIME_OPTIONS_KEY (bool, UseJIT, false) RUNTIME_OPTIONS_KEY (unsigned int, JITCompileThreshold, jit::Jit::kDefaultCompileThreshold) RUNTIME_OPTIONS_KEY (unsigned int, JITWarmupThreshold, jit::Jit::kDefaultWarmupThreshold) -RUNTIME_OPTIONS_KEY (MemoryKiB, JITCodeCacheCapacity, jit::JitCodeCache::kDefaultCapacity) +RUNTIME_OPTIONS_KEY (MemoryKiB, JITCodeCacheInitialCapacity, jit::JitCodeCache::kInitialCapacity) +RUNTIME_OPTIONS_KEY (MemoryKiB, JITCodeCacheMaxCapacity, jit::JitCodeCache::kMaxCapacity) RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \ HSpaceCompactForOOMMinIntervalsMs,\ MsToNs(100 * 1000)) // 100s diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index b09b87fb58..a390908635 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -948,7 +948,12 @@ void ThreadList::SuspendAllForDebugger() { Locks::mutator_lock_->ExclusiveLock(self); Locks::mutator_lock_->ExclusiveUnlock(self); #endif - AssertThreadsAreSuspended(self, self, debug_thread); + // Disabled for the following race condition: + // Thread 1 calls SuspendAllForDebugger, gets preempted after pulsing the mutator lock. + // Thread 2 calls SuspendAll and SetStateUnsafe (perhaps from Dbg::Disconnected). + // Thread 1 fails assertion that all threads are suspended due to thread 2 being in a runnable + // state (from SetStateUnsafe). + // AssertThreadsAreSuspended(self, self, debug_thread); VLOG(threads) << *self << " SuspendAllForDebugger complete"; } diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java index c32d34aa6f..d5fed2adfe 100644 --- a/test/458-checker-instruction-simplification/src/Main.java +++ b/test/458-checker-instruction-simplification/src/Main.java @@ -1226,6 +1226,46 @@ public class Main { return arg / -0.25f; } + /** + * Test strength reduction of factors of the form (2^n + 1). + */ + + /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const9:i\d+>> IntConstant 9 + /// CHECK: Mul [<<Arg>>,<<Const9>>] + + /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const3:i\d+>> IntConstant 3 + /// CHECK: <<Shift:i\d+>> Shl [<<Arg>>,<<Const3>>] + /// CHECK-NEXT: Add [<<Arg>>,<<Shift>>] + + public static int mulPow2Plus1(int arg) { + return arg * 9; + } + + + /** + * Test strength reduction of factors of the form (2^n - 1). + */ + + /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const31:j\d+>> LongConstant 31 + /// CHECK: Mul [<<Arg>>,<<Const31>>] + + /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5 + /// CHECK: <<Shift:j\d+>> Shl [<<Arg>>,<<Const5>>] + /// CHECK-NEXT: Sub [<<Shift>>,<<Arg>>] + + public static long mulPow2Minus1(long arg) { + return arg * 31; + } + + public static void main(String[] args) { int arg = 123456; @@ -1283,5 +1323,15 @@ public class Main { assertLongEquals(Shr56And255(0xc123456787654321L), 0xc1L); assertIntEquals(Shr24And127(0xc1234567), 0x41); assertLongEquals(Shr56And127(0xc123456787654321L), 0x41L); + assertIntEquals(0, mulPow2Plus1(0)); + assertIntEquals(9, mulPow2Plus1(1)); + assertIntEquals(18, mulPow2Plus1(2)); + assertIntEquals(900, mulPow2Plus1(100)); + assertIntEquals(111105, mulPow2Plus1(12345)); + assertLongEquals(0, mulPow2Minus1(0)); + assertLongEquals(31, mulPow2Minus1(1)); + assertLongEquals(62, mulPow2Minus1(2)); + assertLongEquals(3100, mulPow2Minus1(100)); + assertLongEquals(382695, mulPow2Minus1(12345)); } } diff --git a/test/478-checker-clinit-check-pruning/expected.txt b/test/478-checker-clinit-check-pruning/expected.txt index 387e1a7cb1..7de097f666 100644 --- a/test/478-checker-clinit-check-pruning/expected.txt +++ b/test/478-checker-clinit-check-pruning/expected.txt @@ -4,3 +4,9 @@ Main$ClassWithClinit3's static initializer Main$ClassWithClinit4's static initializer Main$ClassWithClinit5's static initializer Main$ClassWithClinit6's static initializer +Main$ClassWithClinit7's static initializer +Main$ClassWithClinit8's static initializer +Main$ClassWithClinit9's static initializer +Main$ClassWithClinit10's static initializer +Main$ClassWithClinit11's static initializer +Main$ClassWithClinit12's static initializer diff --git a/test/478-checker-clinit-check-pruning/src/Main.java b/test/478-checker-clinit-check-pruning/src/Main.java index cff627373d..79935134b4 100644 --- a/test/478-checker-clinit-check-pruning/src/Main.java +++ b/test/478-checker-clinit-check-pruning/src/Main.java @@ -83,7 +83,7 @@ public class Main { // before the next pass (liveness analysis) instead. /// CHECK-START: void Main.invokeStaticNotInlined() liveness (before) - /// CHECK: InvokeStaticOrDirect + /// CHECK: InvokeStaticOrDirect clinit_check:implicit /// CHECK-START: void Main.invokeStaticNotInlined() liveness (before) /// CHECK-NOT: LoadClass @@ -269,7 +269,7 @@ public class Main { /// CHECK-START: void Main.noClinitBecauseOfInvokeStatic() liveness (before) /// CHECK-DAG: <<IntConstant:i\d+>> IntConstant 0 /// CHECK-DAG: <<LoadClass:l\d+>> LoadClass gen_clinit_check:false - /// CHECK-DAG: InvokeStaticOrDirect + /// CHECK-DAG: InvokeStaticOrDirect clinit_check:implicit /// CHECK-DAG: StaticFieldSet [<<LoadClass>>,<<IntConstant>>] /// CHECK-START: void Main.noClinitBecauseOfInvokeStatic() liveness (before) @@ -289,7 +289,7 @@ public class Main { /// CHECK-DAG: <<IntConstant:i\d+>> IntConstant 0 /// CHECK-DAG: <<LoadClass:l\d+>> LoadClass gen_clinit_check:true /// CHECK-DAG: StaticFieldSet [<<LoadClass>>,<<IntConstant>>] - /// CHECK-DAG: InvokeStaticOrDirect + /// CHECK-DAG: InvokeStaticOrDirect clinit_check:none /// CHECK-START: void Main.clinitBecauseOfFieldAccess() liveness (before) /// CHECK-NOT: ClinitCheck @@ -298,6 +298,206 @@ public class Main { ClassWithClinit2.$noinline$staticMethod(); } + /* + * Verify that LoadClass from const-class is not merged with + * later invoke-static (or it's ClinitCheck). + */ + + /// CHECK-START: void Main.constClassAndInvokeStatic(java.lang.Iterable) liveness (before) + /// CHECK: LoadClass gen_clinit_check:false + /// CHECK: InvokeStaticOrDirect clinit_check:implicit + + /// CHECK-START: void Main.constClassAndInvokeStatic(java.lang.Iterable) liveness (before) + /// CHECK-NOT: ClinitCheck + + static void constClassAndInvokeStatic(Iterable it) { + $opt$inline$ignoreClass(ClassWithClinit7.class); + ClassWithClinit7.someStaticMethod(it); + } + + static void $opt$inline$ignoreClass(Class c) { + } + + static class ClassWithClinit7 { + static { + System.out.println("Main$ClassWithClinit7's static initializer"); + } + + // Note: not inlined from constClassAndInvokeStatic() but fully inlined from main(). + static void someStaticMethod(Iterable it) { + // We're not inlining invoke-interface at the moment. + it.iterator(); + } + } + + /* + * Verify that LoadClass from sget is not merged with later invoke-static. + */ + + /// CHECK-START: void Main.sgetAndInvokeStatic(java.lang.Iterable) liveness (before) + /// CHECK: LoadClass gen_clinit_check:true + /// CHECK: InvokeStaticOrDirect clinit_check:none + + /// CHECK-START: void Main.sgetAndInvokeStatic(java.lang.Iterable) liveness (before) + /// CHECK-NOT: ClinitCheck + + static void sgetAndInvokeStatic(Iterable it) { + $opt$inline$ignoreInt(ClassWithClinit8.value); + ClassWithClinit8.someStaticMethod(it); + } + + static void $opt$inline$ignoreInt(int i) { + } + + static class ClassWithClinit8 { + public static int value = 0; + static { + System.out.println("Main$ClassWithClinit8's static initializer"); + } + + // Note: not inlined from sgetAndInvokeStatic() but fully inlined from main(). + static void someStaticMethod(Iterable it) { + // We're not inlining invoke-interface at the moment. + it.iterator(); + } + } + + /* + * Verify that LoadClass from const-class, ClinitCheck from sget and + * InvokeStaticOrDirect from invoke-static are not merged. + */ + + /// CHECK-START: void Main.constClassSgetAndInvokeStatic(java.lang.Iterable) liveness (before) + /// CHECK: LoadClass gen_clinit_check:false + /// CHECK: ClinitCheck + /// CHECK: InvokeStaticOrDirect clinit_check:none + + static void constClassSgetAndInvokeStatic(Iterable it) { + $opt$inline$ignoreClass(ClassWithClinit9.class); + $opt$inline$ignoreInt(ClassWithClinit9.value); + ClassWithClinit9.someStaticMethod(it); + } + + static class ClassWithClinit9 { + public static int value = 0; + static { + System.out.println("Main$ClassWithClinit9's static initializer"); + } + + // Note: not inlined from constClassSgetAndInvokeStatic() but fully inlined from main(). + static void someStaticMethod(Iterable it) { + // We're not inlining invoke-interface at the moment. + it.iterator(); + } + } + + /* + * Verify that LoadClass from a fully-inlined invoke-static is not merged + * with InvokeStaticOrDirect from a later invoke-static to the same method. + */ + + /// CHECK-START: void Main.inlinedInvokeStaticViaNonStatic(java.lang.Iterable) liveness (before) + /// CHECK: LoadClass gen_clinit_check:true + /// CHECK: InvokeStaticOrDirect clinit_check:none + + /// CHECK-START: void Main.inlinedInvokeStaticViaNonStatic(java.lang.Iterable) liveness (before) + /// CHECK-NOT: ClinitCheck + + static void inlinedInvokeStaticViaNonStatic(Iterable it) { + inlinedInvokeStaticViaNonStaticHelper(null); + inlinedInvokeStaticViaNonStaticHelper(it); + } + + static void inlinedInvokeStaticViaNonStaticHelper(Iterable it) { + ClassWithClinit10.inlinedForNull(it); + } + + static class ClassWithClinit10 { + public static int value = 0; + static { + System.out.println("Main$ClassWithClinit10's static initializer"); + } + + static void inlinedForNull(Iterable it) { + if (it != null) { + // We're not inlining invoke-interface at the moment. + it.iterator(); + } + } + } + + /* + * Check that the LoadClass from an invoke-static C.foo() doesn't get merged with + * an invoke-static inside C.foo(). This would mess up the stack walk in the + * resolution trampoline where we would have to load C (if C isn't loaded yet) + * which is not permitted there. + * + * Note: In case of failure, we would get an failed assertion during compilation, + * so we wouldn't really get to the checker tests below. + */ + + /// CHECK-START: void Main.inlinedInvokeStaticViaStatic(java.lang.Iterable) liveness (before) + /// CHECK: LoadClass gen_clinit_check:true + /// CHECK: InvokeStaticOrDirect clinit_check:none + + /// CHECK-START: void Main.inlinedInvokeStaticViaStatic(java.lang.Iterable) liveness (before) + /// CHECK-NOT: ClinitCheck + + static void inlinedInvokeStaticViaStatic(Iterable it) { + ClassWithClinit11.callInlinedForNull(it); + } + + static class ClassWithClinit11 { + public static int value = 0; + static { + System.out.println("Main$ClassWithClinit11's static initializer"); + } + + static void callInlinedForNull(Iterable it) { + inlinedForNull(it); + } + + static void inlinedForNull(Iterable it) { + // We're not inlining invoke-interface at the moment. + it.iterator(); + } + } + + /* + * A test similar to inlinedInvokeStaticViaStatic() but doing the indirect invoke + * twice with the first one to be fully inlined. + */ + + /// CHECK-START: void Main.inlinedInvokeStaticViaStaticTwice(java.lang.Iterable) liveness (before) + /// CHECK: LoadClass gen_clinit_check:true + /// CHECK: InvokeStaticOrDirect clinit_check:none + + /// CHECK-START: void Main.inlinedInvokeStaticViaStaticTwice(java.lang.Iterable) liveness (before) + /// CHECK-NOT: ClinitCheck + + static void inlinedInvokeStaticViaStaticTwice(Iterable it) { + ClassWithClinit12.callInlinedForNull(null); + ClassWithClinit12.callInlinedForNull(it); + } + + static class ClassWithClinit12 { + public static int value = 0; + static { + System.out.println("Main$ClassWithClinit12's static initializer"); + } + + static void callInlinedForNull(Iterable it) { + inlinedForNull(it); + } + + static void inlinedForNull(Iterable it) { + if (it != null) { + // We're not inlining invoke-interface at the moment. + it.iterator(); + } + } + } + // TODO: Add a test for the case of a static method whose declaring // class type index is not available (i.e. when `storage_index` // equals `DexFile::kDexNoIndex` in @@ -310,5 +510,12 @@ public class Main { ClassWithClinit4.invokeStaticNotInlined(); SubClassOfClassWithClinit5.invokeStaticInlined(); SubClassOfClassWithClinit6.invokeStaticNotInlined(); + Iterable it = new Iterable() { public java.util.Iterator iterator() { return null; } }; + constClassAndInvokeStatic(it); + sgetAndInvokeStatic(it); + constClassSgetAndInvokeStatic(it); + inlinedInvokeStaticViaNonStatic(it); + inlinedInvokeStaticViaStatic(it); + inlinedInvokeStaticViaStaticTwice(it); } } diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali index ab4afdb547..1de0baeabd 100644 --- a/test/485-checker-dce-loop-update/smali/TestCase.smali +++ b/test/485-checker-dce-loop-update/smali/TestCase.smali @@ -136,11 +136,11 @@ ## CHECK-DAG: <<Cst1:i\d+>> IntConstant 1 ## CHECK-DAG: <<Cst5:i\d+>> IntConstant 5 ## CHECK-DAG: <<Cst7:i\d+>> IntConstant 7 -## CHECK-DAG: <<Cst9:i\d+>> IntConstant 9 +## CHECK-DAG: <<Cst11:i\d+>> IntConstant 11 ## CHECK-DAG: <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add5:i\d+>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>> ## CHECK-DAG: If [<<ArgY>>] loop:<<HeaderY>> ## CHECK-DAG: If [<<ArgZ>>] loop:<<HeaderY>> -## CHECK-DAG: <<Mul9:i\d+>> Mul [<<PhiX1>>,<<Cst9>>] loop:<<HeaderY>> +## CHECK-DAG: <<Mul9:i\d+>> Mul [<<PhiX1>>,<<Cst11>>] loop:<<HeaderY>> ## CHECK-DAG: <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>] loop:<<HeaderY>> ## CHECK-DAG: If [<<Cst1>>] loop:<<HeaderY>> ## CHECK-DAG: <<Add5>> Add [<<PhiX2>>,<<Cst5>>] loop:<<HeaderY>> @@ -152,12 +152,12 @@ ## CHECK-DAG: <<ArgY:z\d+>> ParameterValue ## CHECK-DAG: <<ArgZ:z\d+>> ParameterValue ## CHECK-DAG: <<Cst7:i\d+>> IntConstant 7 -## CHECK-DAG: <<Cst9:i\d+>> IntConstant 9 +## CHECK-DAG: <<Cst11:i\d+>> IntConstant 11 ## CHECK-DAG: <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>> ## CHECK-DAG: If [<<ArgY>>] loop:<<HeaderY>> ## CHECK-DAG: <<Add7>> Add [<<PhiX1>>,<<Cst7>>] loop:<<HeaderY>> ## CHECK-DAG: If [<<ArgZ>>] loop:none -## CHECK-DAG: <<Mul9:i\d+>> Mul [<<PhiX1>>,<<Cst9>>] loop:none +## CHECK-DAG: <<Mul9:i\d+>> Mul [<<PhiX1>>,<<Cst11>>] loop:none ## CHECK-DAG: <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>] loop:none ## CHECK-DAG: Return [<<PhiX2>>] loop:none @@ -177,7 +177,7 @@ # Additional logic which will end up outside the loop if-eqz p2, :skip_if - mul-int/lit8 p0, p0, 9 + mul-int/lit8 p0, p0, 11 :skip_if if-nez v0, :loop_end # will always take the branch diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java index c766aaa6c9..13c4722bc4 100644 --- a/test/530-checker-lse/src/Main.java +++ b/test/530-checker-lse/src/Main.java @@ -22,7 +22,7 @@ class Circle { return radius * radius * Math.PI; } private double radius; -}; +} class TestClass { TestClass() { @@ -35,17 +35,31 @@ class TestClass { int j; volatile int k; TestClass next; + String str; static int si; -}; +} class SubTestClass extends TestClass { int k; -}; +} class TestClass2 { int i; int j; -}; +} + +class Finalizable { + static boolean sVisited = false; + static final int VALUE = 0xbeef; + int i; + + protected void finalize() { + if (i != VALUE) { + System.out.println("Where is the beef?"); + } + sVisited = true; + } +} public class Main { @@ -56,7 +70,7 @@ public class Main { /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (after) /// CHECK: NewInstance - /// CHECK: InstanceFieldSet + /// CHECK-NOT: InstanceFieldSet /// CHECK-NOT: InstanceFieldGet static double calcCircleArea(double radius) { @@ -117,7 +131,7 @@ public class Main { /// CHECK: InstanceFieldGet /// CHECK: InstanceFieldSet /// CHECK: NewInstance - /// CHECK: InstanceFieldSet + /// CHECK-NOT: InstanceFieldSet /// CHECK-NOT: InstanceFieldGet // A new allocation shouldn't alias with pre-existing values. @@ -223,7 +237,7 @@ public class Main { /// CHECK-START: int Main.test8() load_store_elimination (after) /// CHECK: NewInstance - /// CHECK: InstanceFieldSet + /// CHECK-NOT: InstanceFieldSet /// CHECK: InvokeVirtual /// CHECK-NOT: NullCheck /// CHECK-NOT: InstanceFieldGet @@ -381,8 +395,8 @@ public class Main { /// CHECK-START: int Main.test16() load_store_elimination (after) /// CHECK: NewInstance - /// CHECK-NOT: StaticFieldSet - /// CHECK-NOT: StaticFieldGet + /// CHECK-NOT: InstanceFieldSet + /// CHECK-NOT: InstanceFieldGet // Test inlined constructor. static int test16() { @@ -398,8 +412,8 @@ public class Main { /// CHECK-START: int Main.test17() load_store_elimination (after) /// CHECK: <<Const0:i\d+>> IntConstant 0 /// CHECK: NewInstance - /// CHECK-NOT: StaticFieldSet - /// CHECK-NOT: StaticFieldGet + /// CHECK-NOT: InstanceFieldSet + /// CHECK-NOT: InstanceFieldGet /// CHECK: Return [<<Const0>>] // Test getting default value. @@ -455,6 +469,148 @@ public class Main { return obj; } + /// CHECK-START: void Main.test21() load_store_elimination (before) + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + /// CHECK: StaticFieldSet + /// CHECK: StaticFieldGet + + /// CHECK-START: void Main.test21() load_store_elimination (after) + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + /// CHECK: StaticFieldSet + /// CHECK: InstanceFieldGet + + // Loop side effects can kill heap values, stores need to be kept in that case. + static void test21() { + TestClass obj = new TestClass(); + obj.str = "abc"; + for (int i = 0; i < 2; i++) { + // Generate some loop side effect that does write. + obj.si = 1; + } + System.out.print(obj.str.substring(0, 0)); + } + + /// CHECK-START: int Main.test22() load_store_elimination (before) + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + /// CHECK: InstanceFieldGet + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + /// CHECK: InstanceFieldGet + /// CHECK: InstanceFieldGet + + /// CHECK-START: int Main.test22() load_store_elimination (after) + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + /// CHECK: NewInstance + /// CHECK-NOT: InstanceFieldSet + /// CHECK-NOT: InstanceFieldGet + /// CHECK: NewInstance + /// CHECK-NOT: InstanceFieldSet + /// CHECK: InstanceFieldGet + /// CHECK-NOT: InstanceFieldGet + + // Loop side effects only affects stores into singletons that dominiates the loop header. + static int test22() { + int sum = 0; + TestClass obj1 = new TestClass(); + obj1.i = 2; // This store can't be eliminated since it can be killed by loop side effects. + for (int i = 0; i < 2; i++) { + TestClass obj2 = new TestClass(); + obj2.i = 3; // This store can be eliminated since the singleton is inside the loop. + sum += obj2.i; + } + TestClass obj3 = new TestClass(); + obj3.i = 5; // This store can be eliminated since the singleton is created after the loop. + sum += obj1.i + obj3.i; + return sum; + } + + /// CHECK-START: int Main.test23(boolean) load_store_elimination (before) + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + /// CHECK: InstanceFieldGet + /// CHECK: InstanceFieldSet + /// CHECK: InstanceFieldGet + /// CHECK: Return + /// CHECK: InstanceFieldGet + /// CHECK: InstanceFieldSet + + /// CHECK-START: int Main.test23(boolean) load_store_elimination (after) + /// CHECK: NewInstance + /// CHECK-NOT: InstanceFieldSet + /// CHECK-NOT: InstanceFieldGet + /// CHECK: InstanceFieldSet + /// CHECK: InstanceFieldGet + /// CHECK: Return + /// CHECK-NOT: InstanceFieldGet + /// CHECK: InstanceFieldSet + + // Test store elimination on merging. + static int test23(boolean b) { + TestClass obj = new TestClass(); + obj.i = 3; // This store can be eliminated since the value flows into each branch. + if (b) { + obj.i += 1; // This store cannot be eliminated due to the merge later. + } else { + obj.i += 2; // This store cannot be eliminated due to the merge later. + } + return obj.i; + } + + /// CHECK-START: void Main.testFinalizable() load_store_elimination (before) + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + + /// CHECK-START: void Main.testFinalizable() load_store_elimination (after) + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + + // Allocations and stores into finalizable objects cannot be eliminated. + static void testFinalizable() { + Finalizable finalizable = new Finalizable(); + finalizable.i = Finalizable.VALUE; + } + + static java.lang.ref.WeakReference<Object> getWeakReference() { + return new java.lang.ref.WeakReference<>(new Object()); + } + + static void testFinalizableByForcingGc() { + testFinalizable(); + java.lang.ref.WeakReference<Object> reference = getWeakReference(); + + Runtime runtime = Runtime.getRuntime(); + for (int i = 0; i < 20; ++i) { + runtime.gc(); + System.runFinalization(); + try { + Thread.sleep(1); + } catch (InterruptedException e) { + throw new AssertionError(e); + } + + // Check to see if the weak reference has been garbage collected. + if (reference.get() == null) { + // A little bit more sleep time to make sure. + try { + Thread.sleep(100); + } catch (InterruptedException e) { + throw new AssertionError(e); + } + if (!Finalizable.sVisited) { + System.out.println("finalize() not called."); + } + return; + } + } + System.out.println("testFinalizableByForcingGc() failed to force gc."); + } + public static void assertIntEquals(int expected, int result) { if (expected != result) { throw new Error("Expected: " + expected + ", found: " + result); @@ -508,5 +664,10 @@ public class Main { float[] fa2 = { 1.8f }; assertFloatEquals(test19(fa1, fa2), 1.8f); assertFloatEquals(test20().i, 0); + test21(); + assertIntEquals(test22(), 13); + assertIntEquals(test23(true), 4); + assertIntEquals(test23(false), 5); + testFinalizableByForcingGc(); } } diff --git a/test/run-test b/test/run-test index 10ec3103b9..d0da34e78c 100755 --- a/test/run-test +++ b/test/run-test @@ -669,9 +669,9 @@ export TEST_NAME=`basename ${test_dir}` # ------------------------------- # Return whether the Optimizing compiler has read barrier support for ARCH. function arch_supports_read_barrier() { - # Optimizing has read barrier support for x86 and x86-64 at the + # Optimizing has read barrier support for ARM, x86 and x86-64 at the # moment. - [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ] + [ "x$1" = xarm ] || [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ] } # Tests named '<number>-checker-*' will also have their CFGs verified with diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh index 047c24f8aa..02787fba43 100755 --- a/tools/buildbot-build.sh +++ b/tools/buildbot-build.sh @@ -21,7 +21,7 @@ fi out_dir=${OUT_DIR-out} java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES -common_targets="vogar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar" +common_targets="vogar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar ${out_dir}/host/linux-x86/bin/jack" mode="target" j_arg="-j$(nproc)" showcommands= |