diff options
Diffstat (limited to 'compiler/optimizing')
42 files changed, 1919 insertions, 833 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 6e851bf1ba..12aa15207c 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -146,6 +146,13 @@ uint32_t CodeGenerator::GetArrayLengthOffset(HArrayLength* array_length) { : mirror::Array::LengthOffset().Uint32Value(); } +uint32_t CodeGenerator::GetArrayDataOffset(HArrayGet* array_get) { + DCHECK(array_get->GetType() == Primitive::kPrimChar || !array_get->IsStringCharAt()); + return array_get->IsStringCharAt() + ? mirror::String::ValueOffset().Uint32Value() + : mirror::Array::DataOffset(Primitive::ComponentSize(array_get->GetType())).Uint32Value(); +} + bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const { DCHECK_EQ((*block_order_)[current_block_index_], current); return GetNextBlockToEmit() == FirstNonEmptyBlock(next); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 82a54d2ed1..9364be35ff 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -345,6 +345,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // accessing the String's `count` field in String intrinsics. static uint32_t GetArrayLengthOffset(HArrayLength* array_length); + // Helper that returns the offset of the array's data. + // Note: Besides the normal arrays, we also use the HArrayGet for + // accessing the String's `value` field in String intrinsics. + static uint32_t GetArrayDataOffset(HArrayGet* array_get); + void EmitParallelMoves(Location from1, Location to1, Primitive::Type type1, @@ -449,10 +454,15 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { SlowPathCode* slow_path) = 0; // Check if the desired_string_load_kind is supported. If it is, return it, - // otherwise return a fall-back info that should be used instead. + // otherwise return a fall-back kind that should be used instead. virtual HLoadString::LoadKind GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) = 0; + // Check if the desired_class_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + virtual HLoadClass::LoadKind GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) = 0; + // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( @@ -496,6 +506,20 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { LabelType label; }; + // Type patch info used for recording locations of required linker patches and + // target types. The actual type address can be absolute or PC-relative. + // TODO: Consider merging with MethodPatchInfo and StringPatchInfo - all these + // classes contain the dex file, some index and the label. + template <typename LabelType> + struct TypePatchInfo { + TypePatchInfo(const DexFile& df, uint32_t index) + : dex_file(df), type_index(index), label() { } + + const DexFile& dex_file; + uint32_t type_index; + LabelType label; + }; + CodeGenerator(HGraph* graph, size_t number_of_core_registers, size_t number_of_fpu_registers, diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 4fc3b5434b..663c68a17b 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -176,8 +176,11 @@ class BoundsCheckSlowPathARM : public SlowPathCode { locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - arm_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); + uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() + ? QUICK_ENTRY_POINT(pThrowStringBounds) + : QUICK_ENTRY_POINT(pThrowArrayBounds); + arm_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } @@ -427,7 +430,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || - instruction_->IsCheckCast()) + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && + instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -490,8 +495,12 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); - DCHECK(!instruction_->IsInvoke() || - (instruction_->IsInvokeStaticOrDirect() && + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); @@ -504,7 +513,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { // introduce a copy of it, `index`. Location index = index_; if (index_.IsValid()) { - // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. if (instruction_->IsArrayGet()) { // Compute the actual memory offset and store it in `index`. Register index_reg = index_.AsRegister<Register>(); @@ -552,7 +561,11 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); __ AddConstant(index_reg, index_reg, offset_); } else { - DCHECK(instruction_->IsInvoke()); + // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile + // intrinsics, `index_` is not shifted by a scale factor of 2 + // (as in the case of ArrayGet), as it is actually an offset + // to an object field within an object. + DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) @@ -792,6 +805,9 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, boot_image_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(TypeReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_address_patches_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Always save the LR register to mimic Quick. @@ -4286,11 +4302,11 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); Location out_loc = locations->Out(); + uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); Primitive::Type type = instruction->GetType(); switch (type) { case Primitive::kPrimBoolean: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = @@ -4304,7 +4320,6 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimByte: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = @@ -4318,7 +4333,6 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimShort: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = @@ -4332,7 +4346,6 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimChar: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = @@ -4346,7 +4359,6 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimInt: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = @@ -4363,7 +4375,6 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { @@ -4398,7 +4409,6 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimLong: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; @@ -4411,7 +4421,6 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimFloat: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); SRegister out = out_loc.AsFpuRegister<SRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; @@ -4424,7 +4433,6 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimDouble: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); SRegister out = out_loc.AsFpuRegisterPairLow<SRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; @@ -5077,13 +5085,71 @@ void ParallelMoveResolverARM::RestoreScratch(int reg) { __ Pop(static_cast<Register>(reg)); } +HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) { + if (kEmitCompilerReadBarrier) { + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageAddress: + // TODO: Implement for read barrier. + return HLoadClass::LoadKind::kDexCacheViaMethod; + default: + break; + } + } + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kReferrersClass: + break; + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageAddress: + break; + case HLoadClass::LoadKind::kDexCacheAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + break; + case HLoadClass::LoadKind::kDexCachePcRelative: + DCHECK(!Runtime::Current()->UseJitCompilation()); + // We disable pc-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods + // with irreducible loops. + if (GetGraph()->HasIrreducibleLoops()) { + return HLoadClass::LoadKind::kDexCacheViaMethod; + } + break; + case HLoadClass::LoadKind::kDexCacheViaMethod: + break; + } + return desired_class_load_kind; +} + void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { - InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassLocationSummary( - cls, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(R0), - /* code_generator_supports_read_barrier */ true); + if (cls->NeedsAccessCheck()) { + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Location::RegisterLocation(R0), + /* code_generator_supports_read_barrier */ true); + return; + } + + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + if (load_kind == HLoadClass::LoadKind::kReferrersClass || + load_kind == HLoadClass::LoadKind::kDexCacheViaMethod || + load_kind == HLoadClass::LoadKind::kDexCachePcRelative) { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { @@ -5100,37 +5166,97 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); - Register current_method = locations->InAt(0).AsRegister<Register>(); - - if (cls->IsReferrersClass()) { - DCHECK(!cls->CanCallRuntime()); - DCHECK(!cls->MustGenerateClinitCheck()); - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); - } else { - // /* GcRoot<mirror::Class>[] */ out = - // current_method.ptr_sized_fields_->dex_cache_resolved_types_ - __ LoadFromOffset(kLoadWord, - out, - current_method, - ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value()); - // /* GcRoot<mirror::Class> */ out = out[type_index] - GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - - if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { - DCHECK(cls->CanCallRuntime()); - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - if (!cls->IsInDexCache()) { - __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); - } - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); - } + + bool generate_null_check = false; + switch (cls->GetLoadKind()) { + case HLoadClass::LoadKind::kReferrersClass: { + DCHECK(!cls->CanCallRuntime()); + DCHECK(!cls->MustGenerateClinitCheck()); + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + Register current_method = locations->InAt(0).AsRegister<Register>(); + GenerateGcRootFieldLoad( + cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); + break; + } + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: { + DCHECK(!kEmitCompilerReadBarrier); + __ LoadLiteral(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), + cls->GetTypeIndex())); + break; + } + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(!kEmitCompilerReadBarrier); + CodeGeneratorARM::PcRelativePatchInfo* labels = + codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + __ BindTrackedLabel(&labels->movw_label); + __ movw(out, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(out, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(out, out, ShifterOperand(PC)); + break; + } + case HLoadClass::LoadKind::kBootImageAddress: { + DCHECK(!kEmitCompilerReadBarrier); + DCHECK_NE(cls->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); + __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address)); + break; + } + case HLoadClass::LoadKind::kDexCacheAddress: { + DCHECK_NE(cls->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); + // 16-bit LDR immediate has a 5-bit offset multiplied by the size and that gives + // a 128B range. To try and reduce the number of literals if we load multiple types, + // simply split the dex cache address to a 128B aligned base loaded from a literal + // and the remaining offset embedded in the load. + static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes."); + DCHECK_ALIGNED(cls->GetAddress(), 4u); + constexpr size_t offset_bits = /* encoded bits */ 5 + /* scale */ 2; + uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits); + uint32_t offset = address & MaxInt<uint32_t>(offset_bits); + __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address)); + // /* GcRoot<mirror::Class> */ out = *(base_address + offset) + GenerateGcRootFieldLoad(cls, out_loc, out, offset); + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCachePcRelative: { + Register base_reg = locations->InAt(0).AsRegister<Register>(); + HArmDexCacheArraysBase* base = cls->InputAt(0)->AsArmDexCacheArraysBase(); + int32_t offset = cls->GetDexCacheElementOffset() - base->GetElementOffset(); + // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset) + GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset); + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCacheViaMethod: { + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + Register current_method = locations->InAt(0).AsRegister<Register>(); + __ LoadFromOffset(kLoadWord, + out, + current_method, + ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + GenerateGcRootFieldLoad(cls, out_loc, out, offset); + generate_null_check = !cls->IsInDexCache(); + } + } + + if (generate_null_check || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (generate_null_check) { + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); } } } @@ -5262,6 +5388,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits); uint32_t offset = address & MaxInt<uint32_t>(offset_bits); __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address)); + // /* GcRoot<mirror::String> */ out = *(base_address + offset) GenerateGcRootFieldLoad(load, out_loc, out, offset); break; } @@ -5269,6 +5396,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { Register base_reg = locations->InAt(0).AsRegister<Register>(); HArmDexCacheArraysBase* base = load->InputAt(0)->AsArmDexCacheArraysBase(); int32_t offset = load->GetDexCacheElementOffset() - base->GetElementOffset(); + // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset) GenerateGcRootFieldLoad(load, out_loc, base_reg, offset); break; } @@ -6085,8 +6213,9 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); + ScaleFactor no_scale_factor = TIMES_1; GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, offset, no_index, temp, needs_null_check); + instruction, ref, obj, offset, no_index, no_scale_factor, temp, needs_null_check); } void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -6099,10 +6228,14 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, data_offset, index, temp, needs_null_check); + instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); } void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, @@ -6110,6 +6243,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i Register obj, uint32_t offset, Location index, + ScaleFactor scale_factor, Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); @@ -6164,17 +6298,22 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The actual reference load. if (index.IsValid()) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - // /* HeapReference<Object> */ ref = - // *(obj + offset + index * sizeof(HeapReference<Object>)) + // Load types involving an "index": ArrayGet and + // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. + // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) if (index.IsConstant()) { size_t computed_offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset; + (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset; __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); } else { - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + // Handle the special case of the + // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics, which use + // a register pair as index ("long offset"), of which only the low + // part contains data. + Register index_reg = index.IsRegisterPair() + ? index.AsRegisterPairLow<Register>() + : index.AsRegister<Register>(); + __ add(IP, obj, ShifterOperand(index_reg, LSL, scale_factor)); __ LoadFromOffset(kLoadWord, ref_reg, IP, offset); } } else { @@ -6452,6 +6591,11 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatc return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_); } +CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch( + const DexFile& dex_file, uint32_t type_index) { + return NewPcRelativePatch(dex_file, type_index, &pc_relative_type_patches_); +} + CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); @@ -6470,6 +6614,13 @@ Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_ [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); } +Literal* CodeGeneratorARM::DeduplicateBootImageTypeLiteral(const DexFile& dex_file, + uint32_t type_index) { + return boot_image_type_patches_.GetOrCreate( + TypeReference(&dex_file, type_index), + [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); +} + Literal* CodeGeneratorARM::DeduplicateBootImageAddressLiteral(uint32_t address) { bool needs_patch = GetCompilerOptions().GetIncludePatchInformation(); Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_; @@ -6489,6 +6640,8 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche /* MOVW+MOVT for each base */ 2u * pc_relative_dex_cache_patches_.size() + boot_image_string_patches_.size() + /* MOVW+MOVT for each base */ 2u * pc_relative_string_patches_.size() + + boot_image_type_patches_.size() + + /* MOVW+MOVT for each base */ 2u * pc_relative_type_patches_.size() + boot_image_address_patches_.size(); linker_patches->reserve(size); for (const auto& entry : method_patches_) { @@ -6564,6 +6717,35 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche add_pc_offset, string_index)); } + for (const auto& entry : boot_image_type_patches_) { + const TypeReference& target_type = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = literal->GetLabel()->Position(); + linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, + target_type.dex_file, + target_type.type_index)); + } + for (const PcRelativePatchInfo& info : pc_relative_type_patches_) { + const DexFile& dex_file = info.target_dex_file; + uint32_t type_index = info.offset_or_index; + DCHECK(info.add_pc_label.IsBound()); + uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position()); + // Add MOVW patch. + DCHECK(info.movw_label.IsBound()); + uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position()); + linker_patches->push_back(LinkerPatch::RelativeTypePatch(movw_offset, + &dex_file, + add_pc_offset, + type_index)); + // Add MOVT patch. + DCHECK(info.movt_label.IsBound()); + uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position()); + linker_patches->push_back(LinkerPatch::RelativeTypePatch(movt_offset, + &dex_file, + add_pc_offset, + type_index)); + } for (const auto& entry : boot_image_address_patches_) { DCHECK(GetCompilerOptions().GetIncludePatchInformation()); Literal* literal = entry.second; diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 0020f7b4f4..477c4f18c1 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -24,6 +24,7 @@ #include "parallel_move_resolver.h" #include "utils/arm/assembler_thumb2.h" #include "utils/string_reference.h" +#include "utils/type_reference.h" namespace art { namespace arm { @@ -407,6 +408,11 @@ class CodeGeneratorARM : public CodeGenerator { HLoadString::LoadKind GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + // Check if the desired_class_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadClass::LoadKind GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( @@ -419,10 +425,10 @@ class CodeGeneratorARM : public CodeGenerator { void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays - // and boot image strings. The only difference is the interpretation of the offset_or_index. - // The PC-relative address is loaded with three instructions, MOVW+MOVT - // to load the offset to base_reg and then ADD base_reg, PC. The offset is - // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we + // and boot image strings/types. The only difference is the interpretation of the + // offset_or_index. The PC-relative address is loaded with three instructions, + // MOVW+MOVT to load the offset to base_reg and then ADD base_reg, PC. The offset + // is calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we // currently emit these 3 instructions together, instruction scheduling could // split this sequence apart, so we keep separate labels for each of them. struct PcRelativePatchInfo { @@ -431,7 +437,7 @@ class CodeGeneratorARM : public CodeGenerator { PcRelativePatchInfo(PcRelativePatchInfo&& other) = default; const DexFile& target_dex_file; - // Either the dex cache array element offset or the string index. + // Either the dex cache array element offset or the string/type index. uint32_t offset_or_index; Label movw_label; Label movt_label; @@ -439,9 +445,11 @@ class CodeGeneratorARM : public CodeGenerator { }; PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index); + PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, uint32_t type_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index); + Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); Literal* DeduplicateDexCacheAddressLiteral(uint32_t address); @@ -464,6 +472,16 @@ class CodeGeneratorARM : public CodeGenerator { Location index, Location temp, bool needs_null_check); + // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + Location temp, + bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` // using a slow path. @@ -519,16 +537,6 @@ class CodeGeneratorARM : public CodeGenerator { void GenerateExplicitNullCheck(HNullCheck* instruction); private: - // Factored implementation of GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - Location temp, - bool needs_null_check); - Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; @@ -536,6 +544,9 @@ class CodeGeneratorARM : public CodeGenerator { using BootStringToLiteralMap = ArenaSafeMap<StringReference, Literal*, StringReferenceValueComparator>; + using BootTypeToLiteralMap = ArenaSafeMap<TypeReference, + Literal*, + TypeReferenceValueComparator>; Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); @@ -568,6 +579,10 @@ class CodeGeneratorARM : public CodeGenerator { BootStringToLiteralMap boot_image_string_patches_; // PC-relative String patch info. ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Deduplication map for boot type literals for kBootImageLinkTimeAddress. + BootTypeToLiteralMap boot_image_type_patches_; + // PC-relative type patch info. + ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // Deduplication map for patchable boot image addresses. Uint32ToLiteralMap boot_image_address_patches_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index b63a3d4c1a..c8d33d5743 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -237,8 +237,11 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { codegen->EmitParallelMoves( locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt, locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - arm64_codegen->InvokeRuntime( - QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); + uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() + ? QUICK_ENTRY_POINT(pThrowStringBounds) + : QUICK_ENTRY_POINT(pThrowArrayBounds); + arm64_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } @@ -595,7 +598,9 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || - instruction_->IsCheckCast()) + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && + instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -658,8 +663,12 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { Primitive::Type type = Primitive::kPrimNot; DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); - DCHECK(!instruction_->IsInvoke() || - (instruction_->IsInvokeStaticOrDirect() && + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); @@ -677,7 +686,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { // introduce a copy of it, `index`. Location index = index_; if (index_.IsValid()) { - // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. if (instruction_->IsArrayGet()) { // Compute the actual memory offset and store it in `index`. Register index_reg = RegisterFrom(index_, Primitive::kPrimInt); @@ -725,7 +734,11 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); __ Add(index_reg, index_reg, Operand(offset_)); } else { - DCHECK(instruction_->IsInvoke()); + // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile + // intrinsics, `index_` is not shifted by a scale factor of 2 + // (as in the case of ArrayGet), as it is actually an offset + // to an object field within an object. + DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) @@ -920,6 +933,9 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, boot_image_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(TypeReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_address_patches_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Save the link register (containing the return address) to mimic Quick. @@ -2051,8 +2067,8 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { Register obj = InputRegisterAt(instruction, 0); LocationSummary* locations = instruction->GetLocations(); Location index = locations->InAt(1); - uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); Location out = locations->Out(); + uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); @@ -3725,6 +3741,12 @@ vixl::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(const DexFile& dex_fil return NewPcRelativePatch(dex_file, string_index, adrp_label, &pc_relative_string_patches_); } +vixl::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(const DexFile& dex_file, + uint32_t type_index, + vixl::Label* adrp_label) { + return NewPcRelativePatch(dex_file, type_index, adrp_label, &pc_relative_type_patches_); +} + vixl::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset, vixl::Label* adrp_label) { @@ -3751,6 +3773,13 @@ vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral( [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); } +vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral( + const DexFile& dex_file, uint32_t type_index) { + return boot_image_type_patches_.GetOrCreate( + TypeReference(&dex_file, type_index), + [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); +} + vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(uint64_t address) { bool needs_patch = GetCompilerOptions().GetIncludePatchInformation(); Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_; @@ -3770,6 +3799,8 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc pc_relative_dex_cache_patches_.size() + boot_image_string_patches_.size() + pc_relative_string_patches_.size() + + boot_image_type_patches_.size() + + pc_relative_type_patches_.size() + boot_image_address_patches_.size(); linker_patches->reserve(size); for (const auto& entry : method_patches_) { @@ -3810,6 +3841,19 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc info.pc_insn_label->location(), info.offset_or_index)); } + for (const auto& entry : boot_image_type_patches_) { + const TypeReference& target_type = entry.first; + vixl::Literal<uint32_t>* literal = entry.second; + linker_patches->push_back(LinkerPatch::TypePatch(literal->offset(), + target_type.dex_file, + target_type.type_index)); + } + for (const PcRelativePatchInfo& info : pc_relative_type_patches_) { + linker_patches->push_back(LinkerPatch::RelativeTypePatch(info.label.location(), + &info.target_dex_file, + info.pc_insn_label->location(), + info.offset_or_index)); + } for (const auto& entry : boot_image_address_patches_) { DCHECK(GetCompilerOptions().GetIncludePatchInformation()); vixl::Literal<uint32_t>* literal = entry.second; @@ -3875,13 +3919,63 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } +HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) { + if (kEmitCompilerReadBarrier) { + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageAddress: + // TODO: Implement for read barrier. + return HLoadClass::LoadKind::kDexCacheViaMethod; + default: + break; + } + } + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kReferrersClass: + break; + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageAddress: + break; + case HLoadClass::LoadKind::kDexCacheAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + break; + case HLoadClass::LoadKind::kDexCachePcRelative: + DCHECK(!Runtime::Current()->UseJitCompilation()); + break; + case HLoadClass::LoadKind::kDexCacheViaMethod: + break; + } + return desired_class_load_kind; +} + void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { - InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassLocationSummary( - cls, - LocationFrom(calling_convention.GetRegisterAt(0)), - LocationFrom(vixl::x0), - /* code_generator_supports_read_barrier */ true); + if (cls->NeedsAccessCheck()) { + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + LocationFrom(calling_convention.GetRegisterAt(0)), + LocationFrom(vixl::x0), + /* code_generator_supports_read_barrier */ true); + return; + } + + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + if (load_kind == HLoadClass::LoadKind::kReferrersClass || + load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { @@ -3897,35 +3991,111 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { Location out_loc = cls->GetLocations()->Out(); Register out = OutputRegister(cls); - Register current_method = InputRegisterAt(cls, 0); - if (cls->IsReferrersClass()) { - DCHECK(!cls->CanCallRuntime()); - DCHECK(!cls->MustGenerateClinitCheck()); - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); - } else { - MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize); - // /* GcRoot<mirror::Class>[] */ out = - // current_method.ptr_sized_fields_->dex_cache_resolved_types_ - __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value())); - // /* GcRoot<mirror::Class> */ out = out[type_index] - GenerateGcRootFieldLoad( - cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - - if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { - DCHECK(cls->CanCallRuntime()); - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - if (!cls->IsInDexCache()) { - __ Cbz(out, slow_path->GetEntryLabel()); + + bool generate_null_check = false; + switch (cls->GetLoadKind()) { + case HLoadClass::LoadKind::kReferrersClass: { + DCHECK(!cls->CanCallRuntime()); + DCHECK(!cls->MustGenerateClinitCheck()); + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + Register current_method = InputRegisterAt(cls, 0); + GenerateGcRootFieldLoad( + cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); + break; + } + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!kEmitCompilerReadBarrier); + __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), + cls->GetTypeIndex())); + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(!kEmitCompilerReadBarrier); + // Add ADRP with its PC-relative type patch. + const DexFile& dex_file = cls->GetDexFile(); + uint32_t type_index = cls->GetTypeIndex(); + vixl::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index); + { + vixl::SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(adrp_label); + __ adrp(out.X(), /* offset placeholder */ 0); } - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + // Add ADD with its PC-relative type patch. + vixl::Label* add_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label); + { + vixl::SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(add_label); + __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0)); + } + break; + } + case HLoadClass::LoadKind::kBootImageAddress: { + DCHECK(!kEmitCompilerReadBarrier); + DCHECK(cls->GetAddress() != 0u && IsUint<32>(cls->GetAddress())); + __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(cls->GetAddress())); + break; + } + case HLoadClass::LoadKind::kDexCacheAddress: { + DCHECK_NE(cls->GetAddress(), 0u); + // LDR immediate has a 12-bit offset multiplied by the size and for 32-bit loads + // that gives a 16KiB range. To try and reduce the number of literals if we load + // multiple types, simply split the dex cache address to a 16KiB aligned base + // loaded from a literal and the remaining offset embedded in the load. + static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes."); + DCHECK_ALIGNED(cls->GetAddress(), 4u); + constexpr size_t offset_bits = /* encoded bits */ 12 + /* scale */ 2; + uint64_t base_address = cls->GetAddress() & ~MaxInt<uint64_t>(offset_bits); + uint32_t offset = cls->GetAddress() & MaxInt<uint64_t>(offset_bits); + __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address)); + // /* GcRoot<mirror::Class> */ out = *(base_address + offset) + GenerateGcRootFieldLoad(cls, out_loc, out.X(), offset); + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCachePcRelative: { + // Add ADRP with its PC-relative DexCache access patch. + const DexFile& dex_file = cls->GetDexFile(); + uint32_t element_offset = cls->GetDexCacheElementOffset(); + vixl::Label* adrp_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset); + { + vixl::SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(adrp_label); + __ adrp(out.X(), /* offset placeholder */ 0); } + // Add LDR with its PC-relative DexCache access patch. + vixl::Label* ldr_label = + codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label); + // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ + GenerateGcRootFieldLoad(cls, out_loc, out.X(), /* offset placeholder */ 0, ldr_label); + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCacheViaMethod: { + MemberOffset resolved_types_offset = + ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize); + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + Register current_method = InputRegisterAt(cls, 0); + __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value())); + // /* GcRoot<mirror::Class> */ out = out[type_index] + GenerateGcRootFieldLoad( + cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); + generate_null_check = !cls->IsInDexCache(); + break; + } + } + + if (generate_null_check || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (generate_null_check) { + __ Cbz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); } } } @@ -4046,6 +4216,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { uint64_t base_address = load->GetAddress() & ~MaxInt<uint64_t>(offset_bits); uint32_t offset = load->GetAddress() & MaxInt<uint64_t>(offset_bits); __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address)); + // /* GcRoot<mirror::String> */ out = *(base_address + offset) GenerateGcRootFieldLoad(load, out_loc, out.X(), offset); break; } @@ -4062,6 +4233,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { // Add LDR with its PC-relative DexCache access patch. vixl::Label* ldr_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label); + // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ GenerateGcRootFieldLoad(load, out_loc, out.X(), /* offset placeholder */ 0, ldr_label); break; } @@ -4940,8 +5112,16 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); - GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, offset, no_index, temp, needs_null_check, use_load_acquire); + size_t no_scale_factor = 0U; + GenerateReferenceLoadWithBakerReadBarrier(instruction, + ref, + obj, + offset, + no_index, + no_scale_factor, + temp, + needs_null_check, + use_load_acquire); } void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -4958,10 +5138,21 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins // never use Load-Acquire instructions on ARM64. const bool use_load_acquire = false; + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, data_offset, index, temp, needs_null_check, use_load_acquire); + size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot); + GenerateReferenceLoadWithBakerReadBarrier(instruction, + ref, + obj, + data_offset, + index, + scale_factor, + temp, + needs_null_check, + use_load_acquire); } void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, @@ -4969,15 +5160,16 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* vixl::Register obj, uint32_t offset, Location index, + size_t scale_factor, Register temp, bool needs_null_check, bool use_load_acquire) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - // If `index` is a valid location, then we are emitting an array - // load, so we shouldn't be using a Load Acquire instruction. - // In other words: `index.IsValid()` => `!use_load_acquire`. - DCHECK(!index.IsValid() || !use_load_acquire); + // If we are emitting an array load, we should not be using a + // Load Acquire instruction. In other words: + // `instruction->IsArrayGet()` => `!use_load_acquire`. + DCHECK(!instruction->IsArrayGet() || !use_load_acquire); MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); @@ -5034,20 +5226,33 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // The actual reference load. if (index.IsValid()) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - // /* HeapReference<Object> */ ref = - // *(obj + offset + index * sizeof(HeapReference<Object>)) - const size_t shift_amount = Primitive::ComponentSizeShift(type); - if (index.IsConstant()) { - uint32_t computed_offset = offset + (Int64ConstantFrom(index) << shift_amount); - Load(type, ref_reg, HeapOperand(obj, computed_offset)); + // Load types involving an "index". + if (use_load_acquire) { + // UnsafeGetObjectVolatile intrinsic case. + // Register `index` is not an index in an object array, but an + // offset to an object reference field within object `obj`. + DCHECK(instruction->IsInvoke()) << instruction->DebugName(); + DCHECK(instruction->GetLocations()->Intrinsified()); + DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) + << instruction->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset, 0U); + DCHECK_EQ(scale_factor, 0U); + DCHECK_EQ(needs_null_check, 0U); + // /* HeapReference<Object> */ ref = *(obj + index) + MemOperand field = HeapOperand(obj, XRegisterFrom(index)); + LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); } else { - temp2 = temps.AcquireW(); - __ Add(temp2, obj, offset); - Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, shift_amount)); - temps.Release(temp2); + // ArrayGet and UnsafeGetObject intrinsics cases. + // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) + if (index.IsConstant()) { + uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor); + Load(type, ref_reg, HeapOperand(obj, computed_offset)); + } else { + temp2 = temps.AcquireW(); + __ Add(temp2, obj, offset); + Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor)); + temps.Release(temp2); + } } } else { // /* HeapReference<Object> */ ref = *(obj + offset) diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 422963e7d0..d4bf695602 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -26,6 +26,7 @@ #include "parallel_move_resolver.h" #include "utils/arm64/assembler_arm64.h" #include "utils/string_reference.h" +#include "utils/type_reference.h" #include "vixl/a64/disasm-a64.h" #include "vixl/a64/macro-assembler-a64.h" @@ -460,6 +461,11 @@ class CodeGeneratorARM64 : public CodeGenerator { HLoadString::LoadKind GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + // Check if the desired_class_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadClass::LoadKind GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( @@ -482,6 +488,14 @@ class CodeGeneratorARM64 : public CodeGenerator { uint32_t string_index, vixl::Label* adrp_label = nullptr); + // Add a new PC-relative type patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::Label* NewPcRelativeTypePatch(const DexFile& dex_file, + uint32_t type_index, + vixl::Label* adrp_label = nullptr); + // Add a new PC-relative dex cache array patch for an instruction and return // the label to be bound before the instruction. The instruction will be // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` @@ -492,6 +506,8 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::Literal<uint32_t>* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index); + vixl::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, + uint32_t type_index); vixl::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); vixl::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address); @@ -515,6 +531,17 @@ class CodeGeneratorARM64 : public CodeGenerator { Location index, vixl::Register temp, bool needs_null_check); + // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + Location index, + size_t scale_factor, + vixl::Register temp, + bool needs_null_check, + bool use_load_acquire); // Generate a read barrier for a heap reference within `instruction` // using a slow path. @@ -570,17 +597,6 @@ class CodeGeneratorARM64 : public CodeGenerator { void GenerateExplicitNullCheck(HNullCheck* instruction); private: - // Factored implementation of GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl::Register obj, - uint32_t offset, - Location index, - vixl::Register temp, - bool needs_null_check, - bool use_load_acquire); - using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>; using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::Literal<uint32_t>*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, @@ -589,6 +605,9 @@ class CodeGeneratorARM64 : public CodeGenerator { using BootStringToLiteralMap = ArenaSafeMap<StringReference, vixl::Literal<uint32_t>*, StringReferenceValueComparator>; + using BootTypeToLiteralMap = ArenaSafeMap<TypeReference, + vixl::Literal<uint32_t>*, + TypeReferenceValueComparator>; vixl::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); vixl::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value); @@ -598,13 +617,14 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::Literal<uint64_t>* DeduplicateMethodCodeLiteral(MethodReference target_method); // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays - // and boot image strings. The only difference is the interpretation of the offset_or_index. + // and boot image strings/types. The only difference is the interpretation of the + // offset_or_index. struct PcRelativePatchInfo { PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx) : target_dex_file(dex_file), offset_or_index(off_or_idx), label(), pc_insn_label() { } const DexFile& target_dex_file; - // Either the dex cache array element offset or the string index. + // Either the dex cache array element offset or the string/type index. uint32_t offset_or_index; vixl::Label label; vixl::Label* pc_insn_label; @@ -646,6 +666,10 @@ class CodeGeneratorARM64 : public CodeGenerator { BootStringToLiteralMap boot_image_string_patches_; // PC-relative String patch info. ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Deduplication map for boot type literals for kBootImageLinkTimeAddress. + BootTypeToLiteralMap boot_image_type_patches_; + // PC-relative type patch info. + ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // Deduplication map for patchable boot image addresses. Uint32ToLiteralMap boot_image_address_patches_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index c8e927d026..810db20888 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -166,11 +166,15 @@ class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS { locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds), + uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() + ? QUICK_ENTRY_POINT(pThrowStringBounds) + : QUICK_ENTRY_POINT(pThrowArrayBounds); + mips_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this, IsDirectEntrypoint(kQuickThrowArrayBounds)); + CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } @@ -1635,11 +1639,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); Register obj = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); - Primitive::Type type = instruction->GetType(); + uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + Primitive::Type type = instruction->GetType(); switch (type) { case Primitive::kPrimBoolean: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { size_t offset = @@ -1653,7 +1657,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimByte: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { size_t offset = @@ -1667,7 +1670,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimShort: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { size_t offset = @@ -1682,7 +1684,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimChar: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { size_t offset = @@ -1699,7 +1700,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: case Primitive::kPrimNot: { DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { size_t offset = @@ -1714,7 +1714,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimLong: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); Register out = locations->Out().AsRegisterPairLow<Register>(); if (index.IsConstant()) { size_t offset = @@ -1729,7 +1728,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimFloat: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); FRegister out = locations->Out().AsFpuRegister<FRegister>(); if (index.IsConstant()) { size_t offset = @@ -1744,7 +1742,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimDouble: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); FRegister out = locations->Out().AsFpuRegister<FRegister>(); if (index.IsConstant()) { size_t offset = @@ -3769,6 +3766,13 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( return HLoadString::LoadKind::kDexCacheViaMethod; } +HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) { + DCHECK_NE(desired_class_load_kind, HLoadClass::LoadKind::kReferrersClass); + // TODO: Implement other kinds. + return HLoadClass::LoadKind::kDexCacheViaMethod; +} + HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method ATTRIBUTE_UNUSED) { @@ -4726,7 +4730,6 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi Primitive::Type input_type = conversion->GetInputType(); bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); DCHECK_NE(input_type, result_type); @@ -4735,7 +4738,9 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi Register dst_low = locations->Out().AsRegisterPairLow<Register>(); Register src = locations->InAt(0).AsRegister<Register>(); - __ Move(dst_low, src); + if (dst_low != src) { + __ Move(dst_low, src); + } __ Sra(dst_high, src, 31); } else if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { Register dst = locations->Out().AsRegister<Register>(); @@ -4764,7 +4769,9 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi } break; case Primitive::kPrimInt: - __ Move(dst, src); + if (dst != src) { + __ Move(dst, src); + } break; default: @@ -4921,11 +4928,7 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); __ LoadConst32(TMP, High32Bits(min_val)); __ Mtc1(ZERO, FTMP); - if (fpu_32bit) { - __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1)); - } else { - __ Mthc1(TMP, FTMP); - } + __ MoveToFpuHigh(TMP, FTMP); } if (isR6) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 8c0bae628e..6487f28ad5 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -349,6 +349,11 @@ class CodeGeneratorMIPS : public CodeGenerator { HLoadString::LoadKind GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + // Check if the desired_class_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadClass::LoadKind GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 8d5dc84df9..9f2664c0a5 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -127,10 +127,14 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds), + uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() + ? QUICK_ENTRY_POINT(pThrowStringBounds) + : QUICK_ENTRY_POINT(pThrowArrayBounds); + mips64_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } @@ -918,13 +922,13 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters() const { // TODO: review; anything else? - // TODO: remove once all the issues with register saving/restoring are sorted out. - for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { - blocked_core_registers_[kCoreCalleeSaves[i]] = true; - } - - for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { - blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; + if (GetGraph()->IsDebuggable()) { + // Stubs do not save callee-save floating point registers. If the graph + // is debuggable, we need to deal with these registers differently. For + // now, just block them. + for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { + blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; + } } } @@ -1289,11 +1293,11 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); Location index = locations->InAt(1); - Primitive::Type type = instruction->GetType(); + uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + Primitive::Type type = instruction->GetType(); switch (type) { case Primitive::kPrimBoolean: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = @@ -1307,7 +1311,6 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimByte: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = @@ -1321,7 +1324,6 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimShort: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = @@ -1336,7 +1338,6 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimChar: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = @@ -1353,7 +1354,6 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: case Primitive::kPrimNot: { DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); LoadOperandType load_type = (type == Primitive::kPrimNot) ? kLoadUnsignedWord : kLoadWord; if (index.IsConstant()) { @@ -1369,7 +1369,6 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimLong: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = @@ -1384,7 +1383,6 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimFloat: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); if (index.IsConstant()) { size_t offset = @@ -1399,7 +1397,6 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimDouble: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); if (index.IsConstant()) { size_t offset = @@ -2986,19 +2983,6 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in } HandleInvoke(invoke); - - // While SetupBlockedRegisters() blocks registers S2-S8 due to their - // clobbering somewhere else, reduce further register pressure by avoiding - // allocation of a register for the current method pointer like on x86 baseline. - // TODO: remove this once all the issues with register saving/restoring are - // sorted out. - if (invoke->HasCurrentMethodInput()) { - LocationSummary* locations = invoke->GetLocations(); - Location location = locations->InAt(invoke->GetSpecialInputIndex()); - if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation()); - } - } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) { @@ -3016,6 +3000,13 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( return HLoadString::LoadKind::kDexCacheViaMethod; } +HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) { + DCHECK_NE(desired_class_load_kind, HLoadClass::LoadKind::kReferrersClass); + // TODO: Implement other kinds. + return HLoadClass::LoadKind::kDexCacheViaMethod; +} + HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method ATTRIBUTE_UNUSED) { diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 9785a2e8a8..4b462cc800 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -340,6 +340,11 @@ class CodeGeneratorMIPS64 : public CodeGenerator { HLoadString::LoadKind GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + // Check if the desired_class_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadClass::LoadKind GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 9d0092b674..be20f1f7cc 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -148,10 +148,14 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds), + uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() + ? QUICK_ENTRY_POINT(pThrowStringBounds) + : QUICK_ENTRY_POINT(pThrowArrayBounds); + x86_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } @@ -444,7 +448,9 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || - instruction_->IsCheckCast()) + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && + instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -507,8 +513,12 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); - DCHECK(!instruction_->IsInvoke() || - (instruction_->IsInvokeStaticOrDirect() && + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); @@ -521,7 +531,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { // introduce a copy of it, `index`. Location index = index_; if (index_.IsValid()) { - // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. if (instruction_->IsArrayGet()) { // Compute the actual memory offset and store it in `index`. Register index_reg = index_.AsRegister<Register>(); @@ -569,7 +579,11 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); __ AddImmediate(index_reg, Immediate(offset_)); } else { - DCHECK(instruction_->IsInvoke()); + // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile + // intrinsics, `index_` is not shifted by a scale factor of 2 + // (as in the case of ArrayGet), as it is actually an offset + // to an object field within an object. + DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) @@ -804,6 +818,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), constant_area_start_(-1), fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), method_address_offset_(-1) { @@ -4452,6 +4467,11 @@ void CodeGeneratorX86::RecordStringPatch(HLoadString* load_string) { __ Bind(&string_patches_.back().label); } +void CodeGeneratorX86::RecordTypePatch(HLoadClass* load_class) { + type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex()); + __ Bind(&type_patches_.back().label); +} + Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset) { // Add the patch entry and bind its label at the end of the instruction. @@ -4466,7 +4486,8 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche relative_call_patches_.size() + pc_relative_dex_cache_patches_.size() + simple_patches_.size() + - string_patches_.size(); + string_patches_.size() + + type_patches_.size(); linker_patches->reserve(size); // The label points to the end of the "movl" insn but the literal offset for method // patch needs to point to the embedded constant which occupies the last 4 bytes. @@ -4502,6 +4523,13 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche GetMethodAddressOffset(), info.string_index)); } + for (const TypePatchInfo<Label>& info : type_patches_) { + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; + linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset, + &info.dex_file, + GetMethodAddressOffset(), + info.type_index)); + } } else { for (const StringPatchInfo<Label>& info : string_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; @@ -4509,6 +4537,12 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche &info.dex_file, info.string_index)); } + for (const TypePatchInfo<Label>& info : type_patches_) { + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; + linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, + &info.dex_file, + info.type_index)); + } } } @@ -5040,11 +5074,11 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); Location out_loc = locations->Out(); + uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); Primitive::Type type = instruction->GetType(); switch (type) { case Primitive::kPrimBoolean: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movzxb(out, Address(obj, @@ -5056,7 +5090,6 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimByte: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movsxb(out, Address(obj, @@ -5068,7 +5101,6 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimShort: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movsxw(out, Address(obj, @@ -5080,7 +5112,6 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimChar: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movzxw(out, Address(obj, @@ -5092,7 +5123,6 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimInt: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movl(out, Address(obj, @@ -5107,7 +5137,6 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { @@ -5141,7 +5170,6 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimLong: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>()); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; @@ -5159,7 +5187,6 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimFloat: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movss(out, Address(obj, @@ -5171,7 +5198,6 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimDouble: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movsd(out, Address(obj, @@ -5873,13 +5899,72 @@ void ParallelMoveResolverX86::RestoreScratch(int reg) { __ popl(static_cast<Register>(reg)); } +HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) { + if (kEmitCompilerReadBarrier) { + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageAddress: + // TODO: Implement for read barrier. + return HLoadClass::LoadKind::kDexCacheViaMethod; + default: + break; + } + } + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kReferrersClass: + break; + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + FALLTHROUGH_INTENDED; + case HLoadClass::LoadKind::kDexCachePcRelative: + DCHECK(!Runtime::Current()->UseJitCompilation()); // Note: boot image is also non-JIT. + // We disable pc-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + // TODO: Create as many X86ComputeBaseMethodAddress instructions as needed for methods + // with irreducible loops. + if (GetGraph()->HasIrreducibleLoops()) { + return HLoadClass::LoadKind::kDexCacheViaMethod; + } + break; + case HLoadClass::LoadKind::kBootImageAddress: + break; + case HLoadClass::LoadKind::kDexCacheAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + break; + case HLoadClass::LoadKind::kDexCacheViaMethod: + break; + } + return desired_class_load_kind; +} + void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { - InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassLocationSummary( - cls, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(EAX), - /* code_generator_supports_read_barrier */ true); + if (cls->NeedsAccessCheck()) { + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Location::RegisterLocation(EAX), + /* code_generator_supports_read_barrier */ true); + return; + } + + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + if (load_kind == HLoadClass::LoadKind::kReferrersClass || + load_kind == HLoadClass::LoadKind::kDexCacheViaMethod || + load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || + load_kind == HLoadClass::LoadKind::kDexCachePcRelative) { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { @@ -5896,39 +5981,86 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); - Register current_method = locations->InAt(0).AsRegister<Register>(); - - if (cls->IsReferrersClass()) { - DCHECK(!cls->CanCallRuntime()); - DCHECK(!cls->MustGenerateClinitCheck()); - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - } else { - // /* GcRoot<mirror::Class>[] */ out = - // current_method.ptr_sized_fields_->dex_cache_resolved_types_ - __ movl(out, Address(current_method, - ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value())); - // /* GcRoot<mirror::Class> */ out = out[type_index] - GenerateGcRootFieldLoad( - cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); - - if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { - DCHECK(cls->CanCallRuntime()); - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - if (!cls->IsInDexCache()) { - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - } + bool generate_null_check = false; + switch (cls->GetLoadKind()) { + case HLoadClass::LoadKind::kReferrersClass: { + DCHECK(!cls->CanCallRuntime()); + DCHECK(!cls->MustGenerateClinitCheck()); + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + Register current_method = locations->InAt(0).AsRegister<Register>(); + GenerateGcRootFieldLoad( + cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + break; + } + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: { + DCHECK(!kEmitCompilerReadBarrier); + __ movl(out, Immediate(/* placeholder */ 0)); + codegen_->RecordTypePatch(cls); + break; + } + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(!kEmitCompilerReadBarrier); + Register method_address = locations->InAt(0).AsRegister<Register>(); + __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); + codegen_->RecordTypePatch(cls); + break; + } + case HLoadClass::LoadKind::kBootImageAddress: { + DCHECK(!kEmitCompilerReadBarrier); + DCHECK_NE(cls->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); + __ movl(out, Immediate(address)); + codegen_->RecordSimplePatch(); + break; + } + case HLoadClass::LoadKind::kDexCacheAddress: { + DCHECK_NE(cls->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); + // /* GcRoot<mirror::Class> */ out = *address + GenerateGcRootFieldLoad(cls, out_loc, Address::Absolute(address)); + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCachePcRelative: { + Register base_reg = locations->InAt(0).AsRegister<Register>(); + uint32_t offset = cls->GetDexCacheElementOffset(); + Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), offset); + // /* GcRoot<mirror::Class> */ out = *(base + offset) /* PC-relative */ + GenerateGcRootFieldLoad( + cls, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label); + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCacheViaMethod: { + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + Register current_method = locations->InAt(0).AsRegister<Register>(); + __ movl(out, Address(current_method, + ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value())); + // /* GcRoot<mirror::Class> */ out = out[type_index] + GenerateGcRootFieldLoad( + cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); + generate_null_check = !cls->IsInDexCache(); + break; + } + } - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); - } + if (generate_null_check || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + + if (generate_null_check) { + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + } + + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); } } } @@ -6045,6 +6177,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { case HLoadString::LoadKind::kDexCacheAddress: { DCHECK_NE(load->GetAddress(), 0u); uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); + // /* GcRoot<mirror::String> */ out = *address GenerateGcRootFieldLoad(load, out_loc, Address::Absolute(address)); break; } @@ -6052,6 +6185,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { Register base_reg = locations->InAt(0).AsRegister<Register>(); uint32_t offset = load->GetDexCacheElementOffset(); Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset); + // /* GcRoot<mirror::String> */ out = *(base + offset) /* PC-relative */ GenerateGcRootFieldLoad( load, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label); break; @@ -6853,6 +6987,9 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) Address src = index.IsConstant() ? diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 98dc8ca280..2a9fb80995 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -391,6 +391,11 @@ class CodeGeneratorX86 : public CodeGenerator { HLoadString::LoadKind GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + // Check if the desired_class_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadClass::LoadKind GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( @@ -405,6 +410,7 @@ class CodeGeneratorX86 : public CodeGenerator { void RecordSimplePatch(); void RecordStringPatch(HLoadString* load_string); + void RecordTypePatch(HLoadClass* load_class); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -485,6 +491,14 @@ class CodeGeneratorX86 : public CodeGenerator { Location index, Location temp, bool needs_null_check); + // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + const Address& src, + Location temp, + bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` // using a slow path. @@ -555,15 +569,6 @@ class CodeGeneratorX86 : public CodeGenerator { static constexpr int32_t kDummy32BitOffset = 256; private: - // Factored implementation of GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - const Address& src, - Location temp, - bool needs_null_check); - Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); struct PcRelativeDexCacheAccessInfo { @@ -594,6 +599,8 @@ class CodeGeneratorX86 : public CodeGenerator { ArenaDeque<Label> simple_patches_; // String patch locations. ArenaDeque<StringPatchInfo<Label>> string_patches_; + // Type patch locations. + ArenaDeque<TypePatchInfo<Label>> type_patches_; // Offset to the start of the constant area in the assembled code. // Used for fixups to the constant area. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index a8da5f2ea5..cac33cddb8 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -204,10 +204,14 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds), + uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt() + ? QUICK_ENTRY_POINT(pThrowStringBounds) + : QUICK_ENTRY_POINT(pThrowArrayBounds); + x86_64_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } @@ -465,7 +469,9 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || - instruction_->IsCheckCast()) + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && + instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -528,8 +534,12 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { CpuRegister reg_out = out_.AsRegister<CpuRegister>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_; - DCHECK(!instruction_->IsInvoke() || - (instruction_->IsInvokeStaticOrDirect() && + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); @@ -542,7 +552,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { // introduce a copy of it, `index`. Location index = index_; if (index_.IsValid()) { - // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. if (instruction_->IsArrayGet()) { // Compute real offset and store it in index_. Register index_reg = index_.AsRegister<CpuRegister>().AsRegister(); @@ -590,7 +600,11 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); __ AddImmediate(CpuRegister(index_reg), Immediate(offset_)); } else { - DCHECK(instruction_->IsInvoke()); + // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile + // intrinsics, `index_` is not shifted by a scale factor of 2 + // (as in the case of ArrayGet), as it is actually an offset + // to an object field within an object. + DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) @@ -894,6 +908,11 @@ void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) { __ Bind(&string_patches_.back().label); } +void CodeGeneratorX86_64::RecordTypePatch(HLoadClass* load_class) { + type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex()); + __ Bind(&type_patches_.back().label); +} + Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset) { // Add a patch entry and return the label. @@ -908,7 +927,8 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat relative_call_patches_.size() + pc_relative_dex_cache_patches_.size() + simple_patches_.size() + - string_patches_.size(); + string_patches_.size() + + type_patches_.size(); linker_patches->reserve(size); // The label points to the end of the "movl" insn but the literal offset for method // patch needs to point to the embedded constant which occupies the last 4 bytes. @@ -944,6 +964,14 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat info.label.Position(), info.string_index)); } + for (const TypePatchInfo<Label>& info : type_patches_) { + // These are always PC-relative, see GetSupportedLoadClassKind(). + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; + linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset, + &info.dex_file, + info.label.Position(), + info.type_index)); + } } void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { @@ -1023,6 +1051,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -4540,11 +4569,11 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location index = locations->InAt(1); Location out_loc = locations->Out(); + uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); Primitive::Type type = instruction->GetType(); switch (type) { case Primitive::kPrimBoolean: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movzxb(out, Address(obj, @@ -4556,7 +4585,6 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimByte: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movsxb(out, Address(obj, @@ -4568,7 +4596,6 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimShort: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movsxw(out, Address(obj, @@ -4580,7 +4607,6 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimChar: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movzxw(out, Address(obj, @@ -4592,7 +4618,6 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimInt: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movl(out, Address(obj, @@ -4607,7 +4632,6 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { @@ -4641,7 +4665,6 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimLong: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movq(out, Address(obj, @@ -4653,7 +4676,6 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimFloat: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movss(out, Address(obj, @@ -4665,7 +4687,6 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimDouble: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movsd(out, Address(obj, @@ -5317,13 +5338,64 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( // No need for memory fence, thanks to the x86-64 memory model. } +HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) { + if (kEmitCompilerReadBarrier) { + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageAddress: + // TODO: Implement for read barrier. + return HLoadClass::LoadKind::kDexCacheViaMethod; + default: + break; + } + } + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kReferrersClass: + break; + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!GetCompilerOptions().GetCompilePic()); + // We prefer the always-available RIP-relative address for the x86-64 boot image. + return HLoadClass::LoadKind::kBootImageLinkTimePcRelative; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageAddress: + break; + case HLoadClass::LoadKind::kDexCacheAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + break; + case HLoadClass::LoadKind::kDexCachePcRelative: + DCHECK(!Runtime::Current()->UseJitCompilation()); + break; + case HLoadClass::LoadKind::kDexCacheViaMethod: + break; + } + return desired_class_load_kind; +} + void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { - InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassLocationSummary( - cls, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(RAX), - /* code_generator_supports_read_barrier */ true); + if (cls->NeedsAccessCheck()) { + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Location::RegisterLocation(RAX), + /* code_generator_supports_read_barrier */ true); + return; + } + + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + if (load_kind == HLoadClass::LoadKind::kReferrersClass || + load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { @@ -5340,37 +5412,86 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); - CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); - - if (cls->IsReferrersClass()) { - DCHECK(!cls->CanCallRuntime()); - DCHECK(!cls->MustGenerateClinitCheck()); - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - } else { - // /* GcRoot<mirror::Class>[] */ out = - // current_method.ptr_sized_fields_->dex_cache_resolved_types_ - __ movq(out, Address(current_method, - ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value())); - // /* GcRoot<mirror::Class> */ out = out[type_index] - GenerateGcRootFieldLoad( - cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); - - if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { - DCHECK(cls->CanCallRuntime()); - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - if (!cls->IsInDexCache()) { - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - } - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); + + bool generate_null_check = false; + switch (cls->GetLoadKind()) { + case HLoadClass::LoadKind::kReferrersClass: { + DCHECK(!cls->CanCallRuntime()); + DCHECK(!cls->MustGenerateClinitCheck()); + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); + GenerateGcRootFieldLoad( + cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + break; + } + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(!kEmitCompilerReadBarrier); + __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); + codegen_->RecordTypePatch(cls); + break; + case HLoadClass::LoadKind::kBootImageAddress: { + DCHECK(!kEmitCompilerReadBarrier); + DCHECK_NE(cls->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); + __ movl(out, Immediate(address)); // Zero-extended. + codegen_->RecordSimplePatch(); + break; + } + case HLoadClass::LoadKind::kDexCacheAddress: { + DCHECK_NE(cls->GetAddress(), 0u); + // /* GcRoot<mirror::Class> */ out = *address + if (IsUint<32>(cls->GetAddress())) { + Address address = Address::Absolute(cls->GetAddress(), /* no_rip */ true); + GenerateGcRootFieldLoad(cls, out_loc, address); } else { - __ Bind(slow_path->GetExitLabel()); + // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address). + __ movq(out, Immediate(cls->GetAddress())); + GenerateGcRootFieldLoad(cls, out_loc, Address(out, 0)); } + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCachePcRelative: { + uint32_t offset = cls->GetDexCacheElementOffset(); + Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), offset); + Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, + /* no_rip */ false); + // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ + GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label); + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCacheViaMethod: { + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); + __ movq(out, + Address(current_method, + ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value())); + // /* GcRoot<mirror::Class> */ out = out[type_index] + GenerateGcRootFieldLoad( + cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); + generate_null_check = !cls->IsInDexCache(); + break; + } + default: + LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind(); + UNREACHABLE(); + } + + if (generate_null_check || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (generate_null_check) { + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); } } } @@ -5461,6 +5582,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { } case HLoadString::LoadKind::kDexCacheAddress: { DCHECK_NE(load->GetAddress(), 0u); + // /* GcRoot<mirror::String> */ out = *address if (IsUint<32>(load->GetAddress())) { Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true); GenerateGcRootFieldLoad(load, out_loc, address); @@ -5476,6 +5598,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset); Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false); + // /* GcRoot<mirror::String> */ out = *address /* PC-relative */ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label); break; } @@ -6317,6 +6440,9 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) Address src = index.IsConstant() ? diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 7cf12459b0..d7cfd37c33 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -388,6 +388,11 @@ class CodeGeneratorX86_64 : public CodeGenerator { HLoadString::LoadKind GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + // Check if the desired_class_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadClass::LoadKind GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( @@ -400,6 +405,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { void RecordSimplePatch(); void RecordStringPatch(HLoadString* load_string); + void RecordTypePatch(HLoadClass* load_class); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -427,6 +433,14 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location index, Location temp, bool needs_null_check); + // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + CpuRegister obj, + const Address& src, + Location temp, + bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` // using a slow path. @@ -529,15 +543,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { static constexpr int32_t kDummy32BitOffset = 256; private: - // Factored implementation of GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - CpuRegister obj, - const Address& src, - Location temp, - bool needs_null_check); - struct PcRelativeDexCacheAccessInfo { PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) : target_dex_file(dex_file), element_offset(element_off), label() { } @@ -569,6 +574,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { ArenaDeque<Label> simple_patches_; // String patch locations. ArenaDeque<StringPatchInfo<Label>> string_patches_; + // Type patch locations. + ArenaDeque<TypePatchInfo<Label>> type_patches_; // Fixups for jump tables need to be handled specially. ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc index e9072b9c77..14c318e21f 100644 --- a/compiler/optimizing/dex_cache_array_fixups_arm.cc +++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc @@ -44,8 +44,23 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { } private: + void VisitLoadClass(HLoadClass* load_class) OVERRIDE { + // If this is a load with PC-relative access to the dex cache types array, + // we need to add the dex cache arrays base as the special input. + if (load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCachePcRelative) { + // Initialize base for target dex file if needed. + const DexFile& dex_file = load_class->GetDexFile(); + HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file); + // Update the element offset in base. + DexCacheArraysLayout layout(kArmPointerSize, &dex_file); + base->UpdateElementOffset(layout.TypeOffset(load_class->GetTypeIndex())); + // Add the special argument base to the load. + load_class->AddSpecialInput(base); + } + } + void VisitLoadString(HLoadString* load_string) OVERRIDE { - // If this is a load with PC-relative access to the dex cache methods array, + // If this is a load with PC-relative access to the dex cache strings array, // we need to add the dex cache arrays base as the special input. if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) { // Initialize base for target dex file if needed. diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 3084a4ff2b..4af8d1985b 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -372,6 +372,10 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } void VisitLoadClass(HLoadClass* load_class) OVERRIDE { + StartAttributeStream("load_kind") << load_class->GetLoadKind(); + const char* descriptor = load_class->GetDexFile().GetTypeDescriptor( + load_class->GetDexFile().GetTypeId(load_class->GetTypeIndex())); + StartAttributeStream("class_name") << PrettyDescriptor(descriptor); StartAttributeStream("gen_clinit_check") << std::boolalpha << load_class->MustGenerateClinitCheck() << std::noboolalpha; StartAttributeStream("needs_access_check") << std::boolalpha @@ -399,6 +403,16 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << array_length->IsStringLength() << std::noboolalpha; } + void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE { + StartAttributeStream("is_string_char_at") << std::boolalpha + << bounds_check->IsStringCharAt() << std::noboolalpha; + } + + void VisitArrayGet(HArrayGet* array_get) OVERRIDE { + StartAttributeStream("is_string_char_at") << std::boolalpha + << array_get->IsStringCharAt() << std::noboolalpha; + } + void VisitArraySet(HArraySet* array_set) OVERRIDE { StartAttributeStream("value_can_be_null") << std::boolalpha << array_set->GetValueCanBeNull() << std::noboolalpha; diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 0a5cf80e9d..52426d73c6 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -670,7 +670,7 @@ void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop, // an unsigned entity, for example, as in the following loop that uses the full range: // for (int i = INT_MIN; i < INT_MAX; i++) // TC = UINT_MAX // (2) The TC is only valid if the loop is taken, otherwise TC = 0, as in: - // for (int i = 12; i < U; i++) // TC = 0 when U < 12 + // for (int i = 12; i < U; i++) // TC = 0 when U <= 12 // If this cannot be determined at compile-time, the TC is only valid within the // loop-body proper, not the loop-header unless enforced with an explicit taken-test. // (3) The TC is only valid if the loop is finite, otherwise TC has no value, as in: diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 27b6896150..8f2db3d1d3 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -756,7 +756,15 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* metho invoke_instruction->ReplaceWith(return_replacement); } invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction); - FixUpReturnReferenceType(invoke_instruction, method, return_replacement, do_rtp); + FixUpReturnReferenceType(method, return_replacement); + if (do_rtp && ReturnTypeMoreSpecific(invoke_instruction, return_replacement)) { + // Actual return value has a more specific type than the method's declared + // return type. Run RTP again on the outer graph to propagate it. + ReferenceTypePropagation(graph_, + outer_compilation_unit_.GetDexCache(), + handles_, + /* is_first_run */ false).Run(); + } return true; } @@ -1159,6 +1167,15 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, } } + // We have replaced formal arguments with actual arguments. If actual types + // are more specific than the declared ones, run RTP again on the inner graph. + if (ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) { + ReferenceTypePropagation(callee_graph, + dex_compilation_unit.GetDexCache(), + handles_, + /* is_first_run */ false).Run(); + } + size_t number_of_instructions_budget = kMaximumNumberOfHInstructions; size_t number_of_inlined_instructions = RunOptimizations(callee_graph, code_item, dex_compilation_unit); @@ -1332,13 +1349,87 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph, return number_of_inlined_instructions; } -void HInliner::FixUpReturnReferenceType(HInvoke* invoke_instruction, - ArtMethod* resolved_method, - HInstruction* return_replacement, - bool do_rtp) { +static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti, + bool declared_can_be_null, + HInstruction* actual_obj) + SHARED_REQUIRES(Locks::mutator_lock_) { + if (declared_can_be_null && !actual_obj->CanBeNull()) { + return true; + } + + ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo(); + return (actual_rti.IsExact() && !declared_rti.IsExact()) || + declared_rti.IsStrictSupertypeOf(actual_rti); +} + +ReferenceTypeInfo HInliner::GetClassRTI(mirror::Class* klass) { + return ReferenceTypePropagation::IsAdmissible(klass) + ? ReferenceTypeInfo::Create(handles_->NewHandle(klass)) + : graph_->GetInexactObjectRti(); +} + +bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) { + // If this is an instance call, test whether the type of the `this` argument + // is more specific than the class which declares the method. + if (!resolved_method->IsStatic()) { + if (IsReferenceTypeRefinement(GetClassRTI(resolved_method->GetDeclaringClass()), + /* declared_can_be_null */ false, + invoke_instruction->InputAt(0u))) { + return true; + } + } + + size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); + + // Iterate over the list of parameter types and test whether any of the + // actual inputs has a more specific reference type than the type declared in + // the signature. + const DexFile::TypeList* param_list = resolved_method->GetParameterTypeList(); + for (size_t param_idx = 0, + input_idx = resolved_method->IsStatic() ? 0 : 1, + e = (param_list == nullptr ? 0 : param_list->Size()); + param_idx < e; + ++param_idx, ++input_idx) { + HInstruction* input = invoke_instruction->InputAt(input_idx); + if (input->GetType() == Primitive::kPrimNot) { + mirror::Class* param_cls = resolved_method->GetDexCacheResolvedType( + param_list->GetTypeItem(param_idx).type_idx_, + pointer_size); + if (IsReferenceTypeRefinement(GetClassRTI(param_cls), + /* declared_can_be_null */ true, + input)) { + return true; + } + } + } + + return false; +} + +bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction, + HInstruction* return_replacement) { // Check the integrity of reference types and run another type propagation if needed. if (return_replacement != nullptr) { if (return_replacement->GetType() == Primitive::kPrimNot) { + // Test if the return type is a refinement of the declared return type. + if (IsReferenceTypeRefinement(invoke_instruction->GetReferenceTypeInfo(), + /* declared_can_be_null */ true, + return_replacement)) { + return true; + } + } else if (return_replacement->IsInstanceOf()) { + // Inlining InstanceOf into an If may put a tighter bound on reference types. + return true; + } + } + + return false; +} + +void HInliner::FixUpReturnReferenceType(ArtMethod* resolved_method, + HInstruction* return_replacement) { + if (return_replacement != nullptr) { + if (return_replacement->GetType() == Primitive::kPrimNot) { if (!return_replacement->GetReferenceTypeInfo().IsValid()) { // Make sure that we have a valid type for the return. We may get an invalid one when // we inline invokes with multiple branches and create a Phi for the result. @@ -1347,36 +1438,7 @@ void HInliner::FixUpReturnReferenceType(HInvoke* invoke_instruction, DCHECK(return_replacement->IsPhi()); size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); mirror::Class* cls = resolved_method->GetReturnType(false /* resolve */, pointer_size); - if (cls != nullptr && !cls->IsErroneous()) { - ReferenceTypeInfo::TypeHandle return_handle = handles_->NewHandle(cls); - return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create( - return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */)); - } else { - // Return inexact object type on failures. - return_replacement->SetReferenceTypeInfo(graph_->GetInexactObjectRti()); - } - } - - if (do_rtp) { - // If the return type is a refinement of the declared type run the type propagation again. - ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo(); - ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo(); - if (invoke_rti.IsStrictSupertypeOf(return_rti) - || (return_rti.IsExact() && !invoke_rti.IsExact()) - || !return_replacement->CanBeNull()) { - ReferenceTypePropagation(graph_, - outer_compilation_unit_.GetDexCache(), - handles_, - /* is_first_run */ false).Run(); - } - } - } else if (return_replacement->IsInstanceOf()) { - if (do_rtp) { - // Inlining InstanceOf into an If may put a tighter bound on reference types. - ReferenceTypePropagation(graph_, - outer_compilation_unit_.GetDexCache(), - handles_, - /* is_first_run */ false).Run(); + return_replacement->SetReferenceTypeInfo(GetClassRTI(cls)); } } } diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 7cf1424b6d..02d3a5f499 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -124,10 +124,18 @@ class HInliner : public HOptimization { uint32_t dex_pc) const SHARED_REQUIRES(Locks::mutator_lock_); - void FixUpReturnReferenceType(HInvoke* invoke_instruction, - ArtMethod* resolved_method, - HInstruction* return_replacement, - bool do_rtp) + void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement) + SHARED_REQUIRES(Locks::mutator_lock_); + + // Creates an instance of ReferenceTypeInfo from `klass` if `klass` is + // admissible (see ReferenceTypePropagation::IsAdmissible for details). + // Otherwise returns inexact Object RTI. + ReferenceTypeInfo GetClassRTI(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_); + + bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) + SHARED_REQUIRES(Locks::mutator_lock_); + + bool ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement) SHARED_REQUIRES(Locks::mutator_lock_); // Add a type guard on the given `receiver`. This will add to the graph: diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index f2286e46e6..b4125299ea 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -933,7 +933,7 @@ bool HInstructionBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) IsOutermostCompilingClass(type_index), dex_pc, needs_access_check, - compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, type_index)); + /* is_in_dex_cache */ false); AppendInstruction(load_class); HInstruction* cls = load_class; @@ -1024,7 +1024,7 @@ HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke( is_outer_class, dex_pc, /*needs_access_check*/ false, - compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, storage_index)); + /* is_in_dex_cache */ false); AppendInstruction(load_class); clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); AppendInstruction(clinit_check); @@ -1376,15 +1376,13 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, } } - bool is_in_cache = - compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, storage_index); HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(), storage_index, outer_dex_file, is_outer_class, dex_pc, /*needs_access_check*/ false, - is_in_cache); + /* is_in_dex_cache */ false); AppendInstruction(constant); HInstruction* cls = constant; @@ -1653,7 +1651,7 @@ void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction, IsOutermostCompilingClass(type_index), dex_pc, !can_access, - compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_cache, type_index)); + /* is_in_dex_cache */ false); AppendInstruction(cls); TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class); @@ -2621,8 +2619,6 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache(); bool can_access = compiler_driver_->CanAccessTypeWithoutChecks( dex_compilation_unit_->GetDexMethodIndex(), dex_cache, type_index); - bool is_in_dex_cache = - compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_cache, type_index); AppendInstruction(new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, @@ -2630,7 +2626,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, IsOutermostCompilingClass(type_index), dex_pc, !can_access, - is_in_dex_cache)); + /* is_in_dex_cache */ false)); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction()); break; } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index eb1d1560db..3041c4d2c7 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -101,6 +101,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifyCompare(HInvoke* invoke, bool is_signum, Primitive::Type type); void SimplifyIsNaN(HInvoke* invoke); void SimplifyFP2Int(HInvoke* invoke); + void SimplifyStringCharAt(HInvoke* invoke); void SimplifyStringIsEmptyOrLength(HInvoke* invoke); void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind); @@ -235,22 +236,40 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); - HConstant* input_cst = instruction->GetConstantRight(); - HInstruction* input_other = instruction->GetLeastConstantLeft(); + HInstruction* shift_amount = instruction->GetRight(); + HInstruction* value = instruction->GetLeft(); - if (input_cst != nullptr) { - int64_t cst = Int64FromConstant(input_cst); - int64_t mask = (input_other->GetType() == Primitive::kPrimLong) - ? kMaxLongShiftDistance - : kMaxIntShiftDistance; - if ((cst & mask) == 0) { + int64_t implicit_mask = (value->GetType() == Primitive::kPrimLong) + ? kMaxLongShiftDistance + : kMaxIntShiftDistance; + + if (shift_amount->IsConstant()) { + int64_t cst = Int64FromConstant(shift_amount->AsConstant()); + if ((cst & implicit_mask) == 0) { // Replace code looking like - // SHL dst, src, 0 + // SHL dst, value, 0 // with - // src - instruction->ReplaceWith(input_other); + // value + instruction->ReplaceWith(value); instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); + return; + } + } + + // Shift operations implicitly mask the shift amount according to the type width. Get rid of + // unnecessary explicit masking operations on the shift amount. + // Replace code looking like + // AND masked_shift, shift, <superset of implicit mask> + // SHL dst, value, masked_shift + // with + // SHL dst, value, shift + if (shift_amount->IsAnd()) { + HAnd* and_insn = shift_amount->AsAnd(); + HConstant* mask = and_insn->GetConstantRight(); + if ((mask != nullptr) && ((Int64FromConstant(mask) & implicit_mask) == implicit_mask)) { + instruction->ReplaceInput(and_insn->GetLeastConstantLeft(), 1); + RecordSimplification(); } } } @@ -1685,13 +1704,32 @@ void InstructionSimplifierVisitor::SimplifyFP2Int(HInvoke* invoke) { invoke->ReplaceWithExceptInReplacementAtIndex(select, 0); // false at index 0 } +void InstructionSimplifierVisitor::SimplifyStringCharAt(HInvoke* invoke) { + HInstruction* str = invoke->InputAt(0); + HInstruction* index = invoke->InputAt(1); + uint32_t dex_pc = invoke->GetDexPc(); + ArenaAllocator* arena = GetGraph()->GetArena(); + // We treat String as an array to allow DCE and BCE to seamlessly work on strings, + // so create the HArrayLength, HBoundsCheck and HArrayGet. + HArrayLength* length = new (arena) HArrayLength(str, dex_pc, /* is_string_length */ true); + invoke->GetBlock()->InsertInstructionBefore(length, invoke); + HBoundsCheck* bounds_check = + new (arena) HBoundsCheck(index, length, dex_pc, invoke->GetDexMethodIndex()); + invoke->GetBlock()->InsertInstructionBefore(bounds_check, invoke); + HArrayGet* array_get = + new (arena) HArrayGet(str, index, Primitive::kPrimChar, dex_pc, /* is_string_char_at */ true); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, array_get); + bounds_check->CopyEnvironmentFrom(invoke->GetEnvironment()); + GetGraph()->SetHasBoundsChecks(true); +} + void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke) { HInstruction* str = invoke->InputAt(0); uint32_t dex_pc = invoke->GetDexPc(); // We treat String as an array to allow DCE and BCE to seamlessly work on strings, // so create the HArrayLength. - HArrayLength* length = new (GetGraph()->GetArena()) HArrayLength(str, dex_pc); - length->MarkAsStringLength(); + HArrayLength* length = + new (GetGraph()->GetArena()) HArrayLength(str, dex_pc, /* is_string_length */ true); HInstruction* replacement; if (invoke->GetIntrinsic() == Intrinsics::kStringIsEmpty) { // For String.isEmpty(), create the `HEqual` representing the `length == 0`. @@ -1752,6 +1790,9 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { case Intrinsics::kDoubleDoubleToLongBits: SimplifyFP2Int(instruction); break; + case Intrinsics::kStringCharAt: + SimplifyStringCharAt(instruction); + break; case Intrinsics::kStringIsEmpty: case Intrinsics::kStringLength: SimplifyStringIsEmptyOrLength(instruction); diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index e4a711ec83..983d31d168 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -19,6 +19,7 @@ #include "common_arm64.h" #include "instruction_simplifier_shared.h" #include "mirror/array-inl.h" +#include "mirror/string.h" namespace art { namespace arm64 { @@ -30,7 +31,7 @@ using helpers::ShifterOperandSupportsExtension; void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access, HInstruction* array, HInstruction* index, - int access_size) { + size_t data_offset) { if (kEmitCompilerReadBarrier) { // The read barrier instrumentation does not support the // HArm64IntermediateAddress instruction yet. @@ -55,8 +56,7 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio // Proceed to extract the base address computation. ArenaAllocator* arena = GetGraph()->GetArena(); - HIntConstant* offset = - GetGraph()->GetIntConstant(mirror::Array::DataOffset(access_size).Uint32Value()); + HIntConstant* offset = GetGraph()->GetIntConstant(data_offset); HArm64IntermediateAddress* address = new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc); address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); @@ -189,17 +189,20 @@ void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) { } void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { + size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); TryExtractArrayAccessAddress(instruction, instruction->GetArray(), instruction->GetIndex(), - Primitive::ComponentSize(instruction->GetType())); + data_offset); } void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { + size_t access_size = Primitive::ComponentSize(instruction->GetComponentType()); + size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value(); TryExtractArrayAccessAddress(instruction, instruction->GetArray(), instruction->GetIndex(), - Primitive::ComponentSize(instruction->GetComponentType())); + data_offset); } void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) { diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index da269980e8..4735f85ab0 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -38,7 +38,7 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { void TryExtractArrayAccessAddress(HInstruction* access, HInstruction* array, HInstruction* index, - int access_size); + size_t data_offset); bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); bool TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 83a512738b..3429a8fdbb 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -239,6 +239,7 @@ UNREACHABLE_INTRINSIC(Arch, IntegerCompare) \ UNREACHABLE_INTRINSIC(Arch, LongCompare) \ UNREACHABLE_INTRINSIC(Arch, IntegerSignum) \ UNREACHABLE_INTRINSIC(Arch, LongSignum) \ +UNREACHABLE_INTRINSIC(Arch, StringCharAt) \ UNREACHABLE_INTRINSIC(Arch, StringIsEmpty) \ UNREACHABLE_INTRINSIC(Arch, StringLength) \ UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence) \ diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 29f7672b0a..f43f8edf06 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -47,19 +47,6 @@ bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) { if (res == nullptr) { return false; } - if (kEmitCompilerReadBarrier && res->CanCall()) { - // Generating an intrinsic for this HInvoke may produce an - // IntrinsicSlowPathARM slow path. Currently this approach - // does not work when using read barriers, as the emitted - // calling sequence will make use of another slow path - // (ReadBarrierForRootSlowPathARM for HInvokeStaticOrDirect, - // ReadBarrierSlowPathARM for HInvokeVirtual). So we bail - // out in this case. - // - // TODO: Find a way to have intrinsics work with read barriers. - invoke->SetLocations(nullptr); - return false; - } return res->Intrinsified(); } @@ -524,8 +511,8 @@ static void GenUnsafeGet(HInvoke* invoke, if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { Location temp = locations->GetTemp(0); - codegen->GenerateArrayLoadWithBakerReadBarrier( - invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false); if (is_volatile) { __ dmb(ISH); } @@ -581,10 +568,11 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetOut(Location::RequiresRegister(), + can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier. locations->AddTemp(Location::RequiresRegister()); } } @@ -919,9 +907,10 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic is missing a read barrier, and // therefore sometimes does not work as expected (b/25883050). // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS below). + // implemented (see TODO in GenCAS). // - // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers. + // TODO(rpl): Implement read barrier support in GenCAS and re-enable + // this intrinsic. if (kEmitCompilerReadBarrier) { return; } @@ -932,56 +921,16 @@ void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) { GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); -} - -void IntrinsicLocationsBuilderARM::VisitStringCharAt(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) { - ArmAssembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - // Location of reference to data array - const MemberOffset value_offset = mirror::String::ValueOffset(); - // Location of count - const MemberOffset count_offset = mirror::String::CountOffset(); - - Register obj = locations->InAt(0).AsRegister<Register>(); // String object pointer. - Register idx = locations->InAt(1).AsRegister<Register>(); // Index of character. - Register out = locations->Out().AsRegister<Register>(); // Result character. - - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register array_temp = locations->GetTemp(1).AsRegister<Register>(); - - // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth - // the cost. - // TODO: For simplicity, the index parameter is requested in a register, so different from Quick - // we will not optimize the code for constants (which would save a register). - - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); - codegen_->AddSlowPath(slow_path); - - __ ldr(temp, Address(obj, count_offset.Int32Value())); // temp = str.length. - codegen_->MaybeRecordImplicitNullCheck(invoke); - __ cmp(idx, ShifterOperand(temp)); - __ b(slow_path->GetEntryLabel(), CS); - - __ add(array_temp, obj, ShifterOperand(value_offset.Int32Value())); // array_temp := str.value. - - // Load the value. - __ ldrh(out, Address(array_temp, idx, LSL, 1)); // out := array_temp[idx]. + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS). + // + // TODO(rpl): Implement read barrier support in GenCAS and re-enable + // this intrinsic. + DCHECK(!kEmitCompilerReadBarrier); - __ Bind(slow_path->GetExitLabel()); + GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); } void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) { @@ -1384,6 +1333,12 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke) } void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + if (kEmitCompilerReadBarrier) { + return; + } + CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); LocationSummary* locations = invoke->GetLocations(); if (locations == nullptr) { @@ -1468,11 +1423,11 @@ static void CheckPosition(ArmAssembler* assembler, } } -// TODO: Implement read barriers in the SystemArrayCopy intrinsic. -// Note that this code path is not used (yet) because we do not -// intrinsify methods that can go into the IntrinsicSlowPathARM -// slow path. void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + DCHECK(!kEmitCompilerReadBarrier); + ArmAssembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -2031,7 +1986,7 @@ void IntrinsicLocationsBuilderARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); + // Temporary registers to store lengths of strings and for calculations. locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); @@ -2059,28 +2014,55 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register dstObj = locations->InAt(3).AsRegister<Register>(); Register dstBegin = locations->InAt(4).AsRegister<Register>(); - Register src_ptr = locations->GetTemp(0).AsRegister<Register>(); - Register src_ptr_end = locations->GetTemp(1).AsRegister<Register>(); + Register num_chr = locations->GetTemp(0).AsRegister<Register>(); + Register src_ptr = locations->GetTemp(1).AsRegister<Register>(); Register dst_ptr = locations->GetTemp(2).AsRegister<Register>(); - Register tmp = locations->GetTemp(3).AsRegister<Register>(); // src range to copy. __ add(src_ptr, srcObj, ShifterOperand(value_offset)); - __ add(src_ptr_end, src_ptr, ShifterOperand(srcEnd, LSL, 1)); __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1)); // dst to be copied. __ add(dst_ptr, dstObj, ShifterOperand(data_offset)); __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1)); + __ subs(num_chr, srcEnd, ShifterOperand(srcBegin)); + // Do the copy. - Label loop, done; + Label loop, remainder, done; + + // Early out for valid zero-length retrievals. + __ b(&done, EQ); + + // Save repairing the value of num_chr on the < 4 character path. + __ subs(IP, num_chr, ShifterOperand(4)); + __ b(&remainder, LT); + + // Keep the result of the earlier subs, we are going to fetch at least 4 characters. + __ mov(num_chr, ShifterOperand(IP)); + + // Main loop used for longer fetches loads and stores 4x16-bit characters at a time. + // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code + // to rectify these everywhere this intrinsic applies.) __ Bind(&loop); - __ cmp(src_ptr, ShifterOperand(src_ptr_end)); + __ ldr(IP, Address(src_ptr, char_size * 2)); + __ subs(num_chr, num_chr, ShifterOperand(4)); + __ str(IP, Address(dst_ptr, char_size * 2)); + __ ldr(IP, Address(src_ptr, char_size * 4, Address::PostIndex)); + __ str(IP, Address(dst_ptr, char_size * 4, Address::PostIndex)); + __ b(&loop, GE); + + __ adds(num_chr, num_chr, ShifterOperand(4)); __ b(&done, EQ); - __ ldrh(tmp, Address(src_ptr, char_size, Address::PostIndex)); - __ strh(tmp, Address(dst_ptr, char_size, Address::PostIndex)); - __ b(&loop); + + // Main loop for < 4 character case and remainder handling. Loads and stores one + // 16-bit Java character at a time. + __ Bind(&remainder); + __ ldrh(IP, Address(src_ptr, char_size, Address::PostIndex)); + __ subs(num_chr, num_chr, ShifterOperand(1)); + __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex)); + __ b(&remainder, GT); + __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index d776fb4406..1685cf9c3c 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -149,19 +149,6 @@ bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { if (res == nullptr) { return false; } - if (kEmitCompilerReadBarrier && res->CanCall()) { - // Generating an intrinsic for this HInvoke may produce an - // IntrinsicSlowPathARM64 slow path. Currently this approach - // does not work when using read barriers, as the emitted - // calling sequence will make use of another slow path - // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect, - // ReadBarrierSlowPathARM64 for HInvokeVirtual). So we bail - // out in this case. - // - // TODO: Find a way to have intrinsics work with read barriers. - invoke->SetLocations(nullptr); - return false; - } return res->Intrinsified(); } @@ -791,8 +778,15 @@ static void GenUnsafeGet(HInvoke* invoke, // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. UseScratchRegisterScope temps(masm); Register temp = temps.AcquireW(); - codegen->GenerateArrayLoadWithBakerReadBarrier( - invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, + trg_loc, + base, + /* offset */ 0U, + /* index */ offset_loc, + /* scale_factor */ 0U, + temp, + /* needs_null_check */ false, + is_volatile); } else { // Other cases. MemOperand mem_op(base.X(), offset); @@ -821,7 +815,8 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetOut(Location::RequiresRegister(), + can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); } void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) { @@ -1102,9 +1097,10 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic is missing a read barrier, and // therefore sometimes does not work as expected (b/25883050). // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS below). + // implemented (see TODO in GenCAS). // - // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers. + // TODO(rpl): Implement read barrier support in GenCAS and re-enable + // this intrinsic. if (kEmitCompilerReadBarrier) { return; } @@ -1119,57 +1115,16 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) { GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); -} - -void IntrinsicLocationsBuilderARM64::VisitStringCharAt(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // In case we need to go in the slow path, we can't have the output be the same - // as the input: the current liveness analysis considers the input to be live - // at the point of the call. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorARM64::VisitStringCharAt(HInvoke* invoke) { - vixl::MacroAssembler* masm = GetVIXLAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - // Location of reference to data array - const MemberOffset value_offset = mirror::String::ValueOffset(); - // Location of count - const MemberOffset count_offset = mirror::String::CountOffset(); - - Register obj = WRegisterFrom(locations->InAt(0)); // String object pointer. - Register idx = WRegisterFrom(locations->InAt(1)); // Index of character. - Register out = WRegisterFrom(locations->Out()); // Result character. - - UseScratchRegisterScope temps(masm); - Register temp = temps.AcquireW(); - Register array_temp = temps.AcquireW(); // We can trade this for worse scheduling. - - // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth - // the cost. - // TODO: For simplicity, the index parameter is requested in a register, so different from Quick - // we will not optimize the code for constants (which would save a register). - - SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); - codegen_->AddSlowPath(slow_path); - - __ Ldr(temp, HeapOperand(obj, count_offset)); // temp = str.length. - codegen_->MaybeRecordImplicitNullCheck(invoke); - __ Cmp(idx, temp); - __ B(hs, slow_path->GetEntryLabel()); - - __ Add(array_temp, obj, Operand(value_offset.Int32Value())); // array_temp := str.value. - - // Load the value. - __ Ldrh(out, MemOperand(array_temp.X(), idx, UXTW, 1)); // out := array_temp[idx]. + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS). + // + // TODO(rpl): Implement read barrier support in GenCAS and re-enable + // this intrinsic. + DCHECK(!kEmitCompilerReadBarrier); - __ Bind(slow_path->GetExitLabel()); + GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); } void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { @@ -1745,6 +1700,7 @@ void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); } void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -1770,29 +1726,57 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register dstBegin = XRegisterFrom(locations->InAt(4)); Register src_ptr = XRegisterFrom(locations->GetTemp(0)); - Register src_ptr_end = XRegisterFrom(locations->GetTemp(1)); + Register num_chr = XRegisterFrom(locations->GetTemp(1)); + Register tmp1 = XRegisterFrom(locations->GetTemp(2)); UseScratchRegisterScope temps(masm); Register dst_ptr = temps.AcquireX(); - Register tmp = temps.AcquireW(); + Register tmp2 = temps.AcquireX(); - // src range to copy. + // src address to copy from. __ Add(src_ptr, srcObj, Operand(value_offset)); - __ Add(src_ptr_end, src_ptr, Operand(srcEnd, LSL, 1)); __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1)); - // dst to be copied. + // dst address start to copy to. __ Add(dst_ptr, dstObj, Operand(data_offset)); __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1)); + __ Sub(num_chr, srcEnd, srcBegin); + // Do the copy. - vixl::Label loop, done; + vixl::Label loop; + vixl::Label done; + vixl::Label remainder; + + // Early out for valid zero-length retrievals. + __ Cbz(num_chr, &done); + + // Save repairing the value of num_chr on the < 8 character path. + __ Subs(tmp1, num_chr, 8); + __ B(lt, &remainder); + + // Keep the result of the earlier subs, we are going to fetch at least 8 characters. + __ Mov(num_chr, tmp1); + + // Main loop used for longer fetches loads and stores 8x16-bit characters at a time. + // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.) __ Bind(&loop); - __ Cmp(src_ptr, src_ptr_end); - __ B(&done, eq); - __ Ldrh(tmp, MemOperand(src_ptr, char_size, vixl::PostIndex)); - __ Strh(tmp, MemOperand(dst_ptr, char_size, vixl::PostIndex)); - __ B(&loop); + __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, vixl::PostIndex)); + __ Subs(num_chr, num_chr, 8); + __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, vixl::PostIndex)); + __ B(ge, &loop); + + __ Adds(num_chr, num_chr, 8); + __ B(eq, &done); + + // Main loop for < 8 character case and remainder handling. Loads and stores one + // 16-bit Java character at a time. + __ Bind(&remainder); + __ Ldrh(tmp1, MemOperand(src_ptr, char_size, vixl::PostIndex)); + __ Subs(num_chr, num_chr, 1); + __ Strh(tmp1, MemOperand(dst_ptr, char_size, vixl::PostIndex)); + __ B(gt, &remainder); + __ Bind(&done); } @@ -2033,6 +2017,12 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128; // We want to use two temporary registers in order to reduce the register pressure in arm64. // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary. void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + if (kEmitCompilerReadBarrier) { + return; + } + // Check to see if we have known failures that will cause us to have to bail out // to the runtime, and just generate the runtime call directly. HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); @@ -2085,6 +2075,10 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + DCHECK(!kEmitCompilerReadBarrier); + vixl::MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 140f56a870..d4f44d63e2 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -1872,54 +1872,6 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) { GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); } -// char java.lang.String.charAt(int index) -void IntrinsicLocationsBuilderMIPS::VisitStringCharAt(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // The inputs will be considered live at the last instruction and restored. This would overwrite - // the output with kNoOutputOverlap. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorMIPS::VisitStringCharAt(HInvoke* invoke) { - LocationSummary* locations = invoke->GetLocations(); - MipsAssembler* assembler = GetAssembler(); - - // Location of reference to data array - const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); - // Location of count - const int32_t count_offset = mirror::String::CountOffset().Int32Value(); - - Register obj = locations->InAt(0).AsRegister<Register>(); - Register idx = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - // TODO: Maybe we can support range check elimination. Overall, - // though, I think it's not worth the cost. - // TODO: For simplicity, the index parameter is requested in a - // register, so different from Quick we will not optimize the - // code for constants (which would save a register). - - SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke); - codegen_->AddSlowPath(slow_path); - - // Load the string size - __ Lw(TMP, obj, count_offset); - codegen_->MaybeRecordImplicitNullCheck(invoke); - // Revert to slow path if idx is too large, or negative - __ Bgeu(idx, TMP, slow_path->GetEntryLabel()); - - // out = obj[2*idx]. - __ Sll(TMP, idx, 1); // idx * 2 - __ Addu(TMP, TMP, obj); // Address of char at location idx - __ Lhu(out, TMP, value_offset); // Load char at location idx - - __ Bind(slow_path->GetExitLabel()); -} - // int java.lang.String.compareTo(String anotherString) void IntrinsicLocationsBuilderMIPS::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 6c4e64e4b1..9243f4c93f 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1371,52 +1371,6 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) { GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); } -// char java.lang.String.charAt(int index) -void IntrinsicLocationsBuilderMIPS64::VisitStringCharAt(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicCodeGeneratorMIPS64::VisitStringCharAt(HInvoke* invoke) { - LocationSummary* locations = invoke->GetLocations(); - Mips64Assembler* assembler = GetAssembler(); - - // Location of reference to data array - const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); - // Location of count - const int32_t count_offset = mirror::String::CountOffset().Int32Value(); - - GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister idx = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - // TODO: Maybe we can support range check elimination. Overall, - // though, I think it's not worth the cost. - // TODO: For simplicity, the index parameter is requested in a - // register, so different from Quick we will not optimize the - // code for constants (which would save a register). - - SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke); - codegen_->AddSlowPath(slow_path); - - // Load the string size - __ Lw(TMP, obj, count_offset); - codegen_->MaybeRecordImplicitNullCheck(invoke); - // Revert to slow path if idx is too large, or negative - __ Bgeuc(idx, TMP, slow_path->GetEntryLabel()); - - // out = obj[2*idx]. - __ Sll(TMP, idx, 1); // idx * 2 - __ Daddu(TMP, TMP, obj); // Address of char at location idx - __ Lhu(out, TMP, value_offset); // Load char at location idx - - __ Bind(slow_path->GetExitLabel()); -} - // int java.lang.String.compareTo(String anotherString) void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 05377f984b..031cd1313c 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -60,19 +60,6 @@ bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) { if (res == nullptr) { return false; } - if (kEmitCompilerReadBarrier && res->CanCall()) { - // Generating an intrinsic for this HInvoke may produce an - // IntrinsicSlowPathX86 slow path. Currently this approach - // does not work when using read barriers, as the emitted - // calling sequence will make use of another slow path - // (ReadBarrierForRootSlowPathX86 for HInvokeStaticOrDirect, - // ReadBarrierSlowPathX86 for HInvokeVirtual). So we bail - // out in this case. - // - // TODO: Find a way to have intrinsics work with read barriers. - invoke->SetLocations(nullptr); - return false; - } return res->Intrinsified(); } @@ -1030,48 +1017,6 @@ void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickNextAfter); } -void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) { - // The inputs plus one temp. - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) { - LocationSummary* locations = invoke->GetLocations(); - - // Location of reference to data array. - const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); - // Location of count. - const int32_t count_offset = mirror::String::CountOffset().Int32Value(); - - Register obj = locations->InAt(0).AsRegister<Register>(); - Register idx = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth - // the cost. - // TODO: For simplicity, the index parameter is requested in a register, so different from Quick - // we will not optimize the code for constants (which would save a register). - - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); - codegen_->AddSlowPath(slow_path); - - X86Assembler* assembler = GetAssembler(); - - __ cmpl(idx, Address(obj, count_offset)); - codegen_->MaybeRecordImplicitNullCheck(invoke); - __ j(kAboveEqual, slow_path->GetEntryLabel()); - - // out = out[2*idx]. - __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset)); - - __ Bind(slow_path->GetExitLabel()); -} - void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) { // We need at least two of the positions or length to be an integer constant, // or else we won't have enough free registers. @@ -1864,8 +1809,9 @@ static void GenUnsafeGet(HInvoke* invoke, if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { Location temp = locations->GetTemp(0); - codegen->GenerateArrayLoadWithBakerReadBarrier( - invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + Address src(base, offset, ScaleFactor::TIMES_1, 0); + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, output_loc, base, src, temp, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -1920,16 +1866,17 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, if (is_volatile) { // Need to use XMM to read volatile. locations->AddTemp(Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } else { locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } } else { - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), + can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); } if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier. + // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier. locations->AddTemp(Location::RequiresRegister()); } } @@ -2151,9 +2098,9 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic is missing a read barrier, and // therefore sometimes does not work as expected (b/25883050). // Turn it off temporarily as a quick fix, until the read barrier is - // implemented. + // implemented (see TODO in GenCAS). // - // TODO(rpl): Implement a read barrier in GenCAS below and re-enable + // TODO(rpl): Implement read barrier support in GenCAS and re-enable // this intrinsic. if (kEmitCompilerReadBarrier) { return; @@ -2278,6 +2225,15 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS). + // + // TODO(rpl): Implement read barrier support in GenCAS and re-enable + // this intrinsic. + DCHECK(!kEmitCompilerReadBarrier); + GenCAS(Primitive::kPrimNot, invoke, codegen_); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 67c2f3a866..c5b44d4f5c 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -54,19 +54,6 @@ bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) { if (res == nullptr) { return false; } - if (kEmitCompilerReadBarrier && res->CanCall()) { - // Generating an intrinsic for this HInvoke may produce an - // IntrinsicSlowPathX86_64 slow path. Currently this approach - // does not work when using read barriers, as the emitted - // calling sequence will make use of another slow path - // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect, - // ReadBarrierSlowPathX86_64 for HInvokeVirtual). So we bail - // out in this case. - // - // TODO: Find a way to have intrinsics work with read barriers. - invoke->SetLocations(nullptr); - return false; - } return res->Intrinsified(); } @@ -891,49 +878,6 @@ void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickNextAfter); } -void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) { - // The inputs plus one temp. - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { - LocationSummary* locations = invoke->GetLocations(); - - // Location of reference to data array. - const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); - // Location of count. - const int32_t count_offset = mirror::String::CountOffset().Int32Value(); - - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - - // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth - // the cost. - // TODO: For simplicity, the index parameter is requested in a register, so different from Quick - // we will not optimize the code for constants (which would save a register). - - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); - codegen_->AddSlowPath(slow_path); - - X86_64Assembler* assembler = GetAssembler(); - - __ cmpl(idx, Address(obj, count_offset)); - codegen_->MaybeRecordImplicitNullCheck(invoke); - __ j(kAboveEqual, slow_path->GetEntryLabel()); - - // out = out[2*idx]. - __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset)); - - __ Bind(slow_path->GetExitLabel()); -} - void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { // Check to see if we have known failures that will cause us to have to bail out // to the runtime, and just generate the runtime call directly. @@ -1122,14 +1066,20 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + if (kEmitCompilerReadBarrier) { + return; + } + CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); } -// TODO: Implement read barriers in the SystemArrayCopy intrinsic. -// Note that this code path is not used (yet) because we do not -// intrinsify methods that can go into the IntrinsicSlowPathX86_64 -// slow path. void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + DCHECK(!kEmitCompilerReadBarrier); + X86_64Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1953,8 +1903,9 @@ static void GenUnsafeGet(HInvoke* invoke, if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { Location temp = locations->GetTemp(0); - codegen->GenerateArrayLoadWithBakerReadBarrier( - invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + Address src(base, offset, ScaleFactor::TIMES_1, 0); + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, output_loc, base, src, temp, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -1991,10 +1942,11 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), + can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier. + // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier. locations->AddTemp(Location::RequiresRegister()); } } @@ -2178,9 +2130,9 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic is missing a read barrier, and // therefore sometimes does not work as expected (b/25883050). // Turn it off temporarily as a quick fix, until the read barrier is - // implemented. + // implemented (see TODO in GenCAS). // - // TODO(rpl): Implement a read barrier in GenCAS below and re-enable + // TODO(rpl): Implement read barrier support in GenCAS and re-enable // this intrinsic. if (kEmitCompilerReadBarrier) { return; @@ -2296,6 +2248,15 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS). + // + // TODO(rpl): Implement read barrier support in GenCAS and re-enable + // this intrinsic. + DCHECK(!kEmitCompilerReadBarrier); + GenCAS(Primitive::kPrimNot, invoke, codegen_); } @@ -2484,7 +2445,7 @@ static void GenOneBit(X86_64Assembler* assembler, : CTZ(static_cast<uint32_t>(value)); } if (is_long) { - codegen->Load64BitValue(out, 1L << value); + codegen->Load64BitValue(out, 1ULL << value); } else { codegen->Load32BitValue(out, 1 << value); } diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 8a75a90cfd..7347686830 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -65,6 +65,16 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { is_singleton_and_not_returned_ = false; return; } + if ((user->IsUnresolvedInstanceFieldGet() && (reference_ == user->InputAt(0))) || + (user->IsUnresolvedInstanceFieldSet() && (reference_ == user->InputAt(0)))) { + // The field is accessed in an unresolved way. We mark the object as a singleton to + // disable load/store optimizations on it. + // Note that we could optimize this case and still perform some optimizations until + // we hit the unresolved access, but disabling is the simplest. + is_singleton_ = false; + is_singleton_and_not_returned_ = false; + return; + } if (user->IsReturn()) { is_singleton_and_not_returned_ = false; } diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 63bbc2cd0a..3f27c911be 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -38,7 +38,13 @@ std::ostream& operator<<(std::ostream& os, const Location& location); class Location : public ValueObject { public: enum OutputOverlap { + // The liveness of the output overlaps the liveness of one or + // several input(s); the register allocator cannot reuse an + // input's location for the output's location. kOutputOverlap, + // The liveness of the output does not overlap the liveness of any + // input; the register allocator is allowed to reuse an input's + // location for the output's location. kNoOutputOverlap }; @@ -494,6 +500,10 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { return inputs_.size(); } + // Set the output location. Argument `overlaps` tells whether the + // output overlaps any of the inputs (if so, it cannot share the + // same register as one of the inputs); it is set to + // `Location::kOutputOverlap` by default for safety. void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) { DCHECK(output_.IsInvalid()); output_overlaps_ = overlaps; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 4b4e549e20..c2c212b66f 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2430,8 +2430,69 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckReq } } +bool HLoadClass::InstructionDataEquals(const HInstruction* other) const { + const HLoadClass* other_load_class = other->AsLoadClass(); + // TODO: To allow GVN for HLoadClass from different dex files, we should compare the type + // names rather than type indexes. However, we shall also have to re-think the hash code. + if (type_index_ != other_load_class->type_index_ || + GetPackedFields() != other_load_class->GetPackedFields()) { + return false; + } + LoadKind load_kind = GetLoadKind(); + if (HasAddress(load_kind)) { + return GetAddress() == other_load_class->GetAddress(); + } else if (HasTypeReference(load_kind)) { + return IsSameDexFile(GetDexFile(), other_load_class->GetDexFile()); + } else { + DCHECK(HasDexCacheReference(load_kind)) << load_kind; + // If the type indexes and dex files are the same, dex cache element offsets + // must also be the same, so we don't need to compare them. + return IsSameDexFile(GetDexFile(), other_load_class->GetDexFile()); + } +} + +void HLoadClass::SetLoadKindInternal(LoadKind load_kind) { + // Once sharpened, the load kind should not be changed again. + // Also, kReferrersClass should never be overwritten. + DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod); + SetPackedField<LoadKindField>(load_kind); + + if (load_kind != LoadKind::kDexCacheViaMethod) { + RemoveAsUserOfInput(0u); + SetRawInputAt(0u, nullptr); + } + if (!NeedsEnvironment()) { + RemoveEnvironment(); + SetSideEffects(SideEffects::None()); + } +} + +std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) { + switch (rhs) { + case HLoadClass::LoadKind::kReferrersClass: + return os << "ReferrersClass"; + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + return os << "BootImageLinkTimeAddress"; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + return os << "BootImageLinkTimePcRelative"; + case HLoadClass::LoadKind::kBootImageAddress: + return os << "BootImageAddress"; + case HLoadClass::LoadKind::kDexCacheAddress: + return os << "DexCacheAddress"; + case HLoadClass::LoadKind::kDexCachePcRelative: + return os << "DexCachePcRelative"; + case HLoadClass::LoadKind::kDexCacheViaMethod: + return os << "DexCacheViaMethod"; + default: + LOG(FATAL) << "Unknown HLoadClass::LoadKind: " << static_cast<int>(rhs); + UNREACHABLE(); + } +} + bool HLoadString::InstructionDataEquals(const HInstruction* other) const { const HLoadString* other_load_string = other->AsLoadString(); + // TODO: To allow GVN for HLoadString from different dex files, we should compare the strings + // rather than their indexes. However, we shall also have to re-think the hash code. if (string_index_ != other_load_string->string_index_ || GetPackedFields() != other_load_string->GetPackedFields()) { return false; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 711a6c1b2d..29df7c8ab8 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -26,6 +26,7 @@ #include "base/arena_object.h" #include "base/stl_util.h" #include "dex/compiler_enums.h" +#include "dex_file.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "handle.h" #include "handle_scope.h" @@ -85,6 +86,16 @@ static constexpr InvokeType kInvalidInvokeType = static_cast<InvokeType>(-1); static constexpr uint32_t kNoDexPc = -1; +inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) { + // For the purposes of the compiler, the dex files must actually be the same object + // if we want to safely treat them as the same. This is especially important for JIT + // as custom class loaders can open the same underlying file (or memory) multiple + // times and provide different class resolution but no two class loaders should ever + // use the same DexFile object - doing so is an unsupported hack that can lead to + // all sorts of weird failures. + return &lhs == &rhs; +} + enum IfCondition { // All types. kCondEQ, // == @@ -161,6 +172,10 @@ class ReferenceTypeInfo : ValueObject { static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact); + static ReferenceTypeInfo Create(TypeHandle type_handle) SHARED_REQUIRES(Locks::mutator_lock_) { + return Create(type_handle, type_handle->CannotBeAssignedFromOtherTypes()); + } + static ReferenceTypeInfo CreateUnchecked(TypeHandle type_handle, bool is_exact) { return ReferenceTypeInfo(type_handle, is_exact); } @@ -1920,6 +1935,14 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { environment_ = environment; } + void InsertRawEnvironment(HEnvironment* environment) { + DCHECK(environment_ != nullptr); + DCHECK_EQ(environment->GetHolder(), this); + DCHECK(environment->GetParent() == nullptr); + environment->parent_ = environment_; + environment_ = environment; + } + void RemoveEnvironment(); // Set the environment of this instruction, copying it from `environment`. While @@ -5079,8 +5102,13 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { class HArrayGet FINAL : public HExpression<2> { public: - HArrayGet(HInstruction* array, HInstruction* index, Primitive::Type type, uint32_t dex_pc) + HArrayGet(HInstruction* array, + HInstruction* index, + Primitive::Type type, + uint32_t dex_pc, + bool is_string_char_at = false) : HExpression(type, SideEffects::ArrayReadOfType(type), dex_pc) { + SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at); SetRawInputAt(0, array); SetRawInputAt(1, index); } @@ -5114,12 +5142,24 @@ class HArrayGet FINAL : public HExpression<2> { return result; } + bool IsStringCharAt() const { return GetPackedFlag<kFlagIsStringCharAt>(); } + HInstruction* GetArray() const { return InputAt(0); } HInstruction* GetIndex() const { return InputAt(1); } DECLARE_INSTRUCTION(ArrayGet); private: + // We treat a String as an array, creating the HArrayGet from String.charAt() + // intrinsic in the instruction simplifier. We can always determine whether + // a particular HArrayGet is actually a String.charAt() by looking at the type + // of the input but that requires holding the mutator lock, so we prefer to use + // a flag, so that code generators don't need to do the locking. + static constexpr size_t kFlagIsStringCharAt = kNumberOfExpressionPackedBits; + static constexpr size_t kNumberOfArrayGetPackedBits = kFlagIsStringCharAt + 1; + static_assert(kNumberOfArrayGetPackedBits <= HInstruction::kMaxNumberOfPackedBits, + "Too many packed fields."); + DISALLOW_COPY_AND_ASSIGN(HArrayGet); }; @@ -5225,8 +5265,9 @@ class HArraySet FINAL : public HTemplateInstruction<3> { class HArrayLength FINAL : public HExpression<1> { public: - HArrayLength(HInstruction* array, uint32_t dex_pc) + HArrayLength(HInstruction* array, uint32_t dex_pc, bool is_string_length = false) : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) { + SetPackedFlag<kFlagIsStringLength>(is_string_length); // Note that arrays do not change length, so the instruction does not // depend on any write. SetRawInputAt(0, array); @@ -5240,7 +5281,6 @@ class HArrayLength FINAL : public HExpression<1> { return obj == InputAt(0); } - void MarkAsStringLength() { SetPackedFlag<kFlagIsStringLength>(); } bool IsStringLength() const { return GetPackedFlag<kFlagIsStringLength>(); } DECLARE_INSTRUCTION(ArrayLength); @@ -5263,8 +5303,12 @@ class HBoundsCheck FINAL : public HExpression<2> { public: // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException` // constructor. - HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc) - : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + HBoundsCheck(HInstruction* index, + HInstruction* length, + uint32_t dex_pc, + uint32_t string_char_at_method_index = DexFile::kDexNoIndex) + : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc), + string_char_at_method_index_(string_char_at_method_index) { DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(index->GetType())); SetRawInputAt(0, index); SetRawInputAt(1, length); @@ -5279,11 +5323,23 @@ class HBoundsCheck FINAL : public HExpression<2> { bool CanThrow() const OVERRIDE { return true; } + bool IsStringCharAt() const { return GetStringCharAtMethodIndex() != DexFile::kDexNoIndex; } + uint32_t GetStringCharAtMethodIndex() const { return string_char_at_method_index_; } + HInstruction* GetIndex() const { return InputAt(0); } DECLARE_INSTRUCTION(BoundsCheck); private: + // We treat a String as an array, creating the HBoundsCheck from String.charAt() + // intrinsic in the instruction simplifier. We want to include the String.charAt() + // in the stack trace if we actually throw the StringIndexOutOfBoundsException, + // so we need to create an HEnvironment which will be translated to an InlineInfo + // indicating the extra stack frame. Since we add this HEnvironment quite late, + // in the PrepareForRegisterAllocation pass, we need to remember the method index + // from the invoke as we don't want to look again at the dex bytecode. + uint32_t string_char_at_method_index_; // DexFile::kDexNoIndex if regular array. + DISALLOW_COPY_AND_ASSIGN(HBoundsCheck); }; @@ -5329,8 +5385,44 @@ class HNativeDebugInfo : public HTemplateInstruction<0> { /** * Instruction to load a Class object. */ -class HLoadClass FINAL : public HExpression<1> { +class HLoadClass FINAL : public HInstruction { public: + // Determines how to load the Class. + enum class LoadKind { + // Use the Class* from the method's own ArtMethod*. + kReferrersClass, + + // Use boot image Class* address that will be known at link time. + // Used for boot image classes referenced by boot image code in non-PIC mode. + kBootImageLinkTimeAddress, + + // Use PC-relative boot image Class* address that will be known at link time. + // Used for boot image classes referenced by boot image code in PIC mode. + kBootImageLinkTimePcRelative, + + // Use a known boot image Class* address, embedded in the code by the codegen. + // Used for boot image classes referenced by apps in AOT- and JIT-compiled code. + // Note: codegen needs to emit a linker patch if indicated by compiler options' + // GetIncludePatchInformation(). + kBootImageAddress, + + // Load from the resolved types array at an absolute address. + // Used for classes outside the boot image referenced by JIT-compiled code. + kDexCacheAddress, + + // Load from resolved types array in the dex cache using a PC-relative load. + // Used for classes outside boot image when we know that we can access + // the dex cache arrays using a PC-relative load. + kDexCachePcRelative, + + // Load from resolved types array accessed through the class loaded from + // the compiled method's own ArtMethod*. This is the default access type when + // all other types are unavailable. + kDexCacheViaMethod, + + kLast = kDexCacheViaMethod + }; + HLoadClass(HCurrentMethod* current_method, uint16_t type_index, const DexFile& dex_file, @@ -5338,7 +5430,8 @@ class HLoadClass FINAL : public HExpression<1> { uint32_t dex_pc, bool needs_access_check, bool is_in_dex_cache) - : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc), + : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc), + special_input_(HUserRecord<HInstruction*>(current_method)), type_index_(type_index), dex_file_(dex_file), loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { @@ -5346,26 +5439,47 @@ class HLoadClass FINAL : public HExpression<1> { // methods so we can't possibly end up in this situation. DCHECK(!is_referrers_class || !needs_access_check); - SetPackedFlag<kFlagIsReferrersClass>(is_referrers_class); + SetPackedField<LoadKindField>( + is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod); SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); SetPackedFlag<kFlagIsInDexCache>(is_in_dex_cache); SetPackedFlag<kFlagGenerateClInitCheck>(false); - SetRawInputAt(0, current_method); } - bool CanBeMoved() const OVERRIDE { return true; } + void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) { + DCHECK(HasAddress(load_kind)); + load_data_.address = address; + SetLoadKindInternal(load_kind); + } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { - // Note that we don't need to test for generate_clinit_check_. - // Whether or not we need to generate the clinit check is processed in - // prepare_for_register_allocator based on existing HInvokes and HClinitChecks. - return other->AsLoadClass()->type_index_ == type_index_ && - other->AsLoadClass()->GetPackedFields() == GetPackedFields(); + void SetLoadKindWithTypeReference(LoadKind load_kind, + const DexFile& dex_file, + uint32_t type_index) { + DCHECK(HasTypeReference(load_kind)); + DCHECK(IsSameDexFile(dex_file_, dex_file)); + DCHECK_EQ(type_index_, type_index); + SetLoadKindInternal(load_kind); + } + + void SetLoadKindWithDexCacheReference(LoadKind load_kind, + const DexFile& dex_file, + uint32_t element_index) { + DCHECK(HasDexCacheReference(load_kind)); + DCHECK(IsSameDexFile(dex_file_, dex_file)); + load_data_.dex_cache_element_index = element_index; + SetLoadKindInternal(load_kind); } + LoadKind GetLoadKind() const { + return GetPackedField<LoadKindField>(); + } + + bool CanBeMoved() const OVERRIDE { return true; } + + bool InstructionDataEquals(const HInstruction* other) const; + size_t ComputeHashCode() const OVERRIDE { return type_index_; } - uint16_t GetTypeIndex() const { return type_index_; } bool CanBeNull() const OVERRIDE { return false; } bool NeedsEnvironment() const OVERRIDE { @@ -5400,7 +5514,15 @@ class HLoadClass FINAL : public HExpression<1> { loaded_class_rti_ = rti; } - const DexFile& GetDexFile() { return dex_file_; } + uint32_t GetTypeIndex() const { return type_index_; } + const DexFile& GetDexFile() const { return dex_file_; } + + uint32_t GetDexCacheElementOffset() const; + + uint64_t GetAddress() const { + DCHECK(HasAddress(GetLoadKind())); + return load_data_.address; + } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { return !IsReferrersClass(); } @@ -5408,30 +5530,96 @@ class HLoadClass FINAL : public HExpression<1> { return SideEffects::CanTriggerGC(); } - bool IsReferrersClass() const { return GetPackedFlag<kFlagIsReferrersClass>(); } + bool IsReferrersClass() const { return GetLoadKind() == LoadKind::kReferrersClass; } bool NeedsAccessCheck() const { return GetPackedFlag<kFlagNeedsAccessCheck>(); } bool IsInDexCache() const { return GetPackedFlag<kFlagIsInDexCache>(); } bool MustGenerateClinitCheck() const { return GetPackedFlag<kFlagGenerateClInitCheck>(); } + void MarkInDexCache() { + SetPackedFlag<kFlagIsInDexCache>(true); + DCHECK(!NeedsEnvironment()); + RemoveEnvironment(); + SetSideEffects(SideEffects::None()); + } + + void AddSpecialInput(HInstruction* special_input); + + using HInstruction::GetInputRecords; // Keep the const version visible. + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL { + return ArrayRef<HUserRecord<HInstruction*>>( + &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u); + } + + Primitive::Type GetType() const OVERRIDE { + return Primitive::kPrimNot; + } + DECLARE_INSTRUCTION(LoadClass); private: - static constexpr size_t kFlagIsReferrersClass = kNumberOfExpressionPackedBits; - static constexpr size_t kFlagNeedsAccessCheck = kFlagIsReferrersClass + 1; + static constexpr size_t kFlagNeedsAccessCheck = kNumberOfGenericPackedBits; static constexpr size_t kFlagIsInDexCache = kFlagNeedsAccessCheck + 1; // Whether this instruction must generate the initialization check. // Used for code generation. static constexpr size_t kFlagGenerateClInitCheck = kFlagIsInDexCache + 1; - static constexpr size_t kNumberOfLoadClassPackedBits = kFlagGenerateClInitCheck + 1; + static constexpr size_t kFieldLoadKind = kFlagGenerateClInitCheck + 1; + static constexpr size_t kFieldLoadKindSize = + MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast)); + static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize; static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields."); + using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>; + + static bool HasTypeReference(LoadKind load_kind) { + return load_kind == LoadKind::kBootImageLinkTimeAddress || + load_kind == LoadKind::kBootImageLinkTimePcRelative || + load_kind == LoadKind::kDexCacheViaMethod || + load_kind == LoadKind::kReferrersClass; + } + + static bool HasAddress(LoadKind load_kind) { + return load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kDexCacheAddress; + } + + static bool HasDexCacheReference(LoadKind load_kind) { + return load_kind == LoadKind::kDexCachePcRelative; + } + + void SetLoadKindInternal(LoadKind load_kind); + + // The special input is the HCurrentMethod for kDexCacheViaMethod or kReferrersClass. + // For other load kinds it's empty or possibly some architecture-specific instruction + // for PC-relative loads, i.e. kDexCachePcRelative or kBootImageLinkTimePcRelative. + HUserRecord<HInstruction*> special_input_; const uint16_t type_index_; const DexFile& dex_file_; + union { + uint32_t dex_cache_element_index; // Only for dex cache reference. + uint64_t address; // Up to 64-bit, needed for kDexCacheAddress on 64-bit targets. + } load_data_; + ReferenceTypeInfo loaded_class_rti_; DISALLOW_COPY_AND_ASSIGN(HLoadClass); }; +std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs); + +// Note: defined outside class to see operator<<(., HLoadClass::LoadKind). +inline uint32_t HLoadClass::GetDexCacheElementOffset() const { + DCHECK(HasDexCacheReference(GetLoadKind())) << GetLoadKind(); + return load_data_.dex_cache_element_index; +} + +// Note: defined outside class to see operator<<(., HLoadClass::LoadKind). +inline void HLoadClass::AddSpecialInput(HInstruction* special_input) { + // The special input is used for PC-relative loads on some architectures. + DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || + GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind(); + DCHECK(special_input_.GetInstruction() == nullptr); + special_input_ = HUserRecord<HInstruction*>(special_input); + special_input->AddUseAt(this, 0); +} class HLoadString FINAL : public HInstruction { public: @@ -5599,6 +5787,9 @@ class HLoadString FINAL : public HInstruction { void SetLoadKindInternal(LoadKind load_kind); + // The special input is the HCurrentMethod for kDexCacheViaMethod. + // For other load kinds it's empty or possibly some architecture-specific instruction + // for PC-relative loads, i.e. kDexCachePcRelative or kBootImageLinkTimePcRelative. HUserRecord<HInstruction*> special_input_; // String index serves also as the hash code and it's also needed for slow-paths, @@ -6572,16 +6763,6 @@ inline int64_t Int64FromConstant(HConstant* constant) { } } -inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) { - // For the purposes of the compiler, the dex files must actually be the same object - // if we want to safely treat them as the same. This is especially important for JIT - // as custom class loaders can open the same underlying file (or memory) multiple - // times and provide different class resolution but no two class loaders should ever - // use the same DexFile object - doing so is an unsupported hack that can lead to - // all sorts of weird failures. - return &lhs == &rhs; -} - #define INSTRUCTION_TYPE_CHECK(type, super) \ inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \ inline const H##type* HInstruction::As##type() const { \ diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index cb2fc0a19a..93116f8bab 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -80,6 +80,15 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { HandleInvoke(invoke); } + void VisitLoadClass(HLoadClass* load_class) OVERRIDE { + HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); + if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || + load_kind == HLoadClass::LoadKind::kDexCachePcRelative) { + InitializePCRelativeBasePointer(); + load_class->AddSpecialInput(base_); + } + } + void VisitLoadString(HLoadString* load_string) OVERRIDE { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index c941c0c086..696b8c6859 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -40,6 +40,22 @@ void PrepareForRegisterAllocation::VisitDivZeroCheck(HDivZeroCheck* check) { void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) { check->ReplaceWith(check->InputAt(0)); + if (check->IsStringCharAt()) { + // Add a fake environment for String.charAt() inline info as we want + // the exception to appear as being thrown from there. + const DexFile& dex_file = check->GetEnvironment()->GetDexFile(); + DCHECK_STREQ(PrettyMethod(check->GetStringCharAtMethodIndex(), dex_file).c_str(), + "char java.lang.String.charAt(int)"); + ArenaAllocator* arena = GetGraph()->GetArena(); + HEnvironment* environment = new (arena) HEnvironment(arena, + /* number_of_vregs */ 0u, + dex_file, + check->GetStringCharAtMethodIndex(), + /* dex_pc */ DexFile::kDexNoIndex, + kVirtual, + check); + check->InsertRawEnvironment(environment); + } } void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) { diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 3e6adcb172..3dfd7282cd 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -46,13 +46,6 @@ static inline ReferenceTypeInfo::TypeHandle GetRootHandle(StackHandleScopeCollec return *cache; } -// Returns true if klass is admissible to the propagation: non-null and resolved. -// For an array type, we also check if the component type is admissible. -static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) { - return klass != nullptr && klass->IsResolved() && - (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType())); -} - ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetObjectClassHandle() { return GetRootHandle(handles_, ClassLinker::kJavaLangObject, &object_class_handle_); } diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 2106be6b53..edd83bf5de 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -42,6 +42,14 @@ class ReferenceTypePropagation : public HOptimization { void Run() OVERRIDE; + // Returns true if klass is admissible to the propagation: non-null and resolved. + // For an array type, we also check if the component type is admissible. + static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) { + return klass != nullptr && + klass->IsResolved() && + (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType())); + } + static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation"; private: diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 08bd35f14a..97f34e6c32 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -40,13 +40,14 @@ void HSharpening::Run() { HInstruction* instruction = it.Current(); if (instruction->IsInvokeStaticOrDirect()) { ProcessInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect()); + } else if (instruction->IsLoadClass()) { + ProcessLoadClass(instruction->AsLoadClass()); } else if (instruction->IsLoadString()) { ProcessLoadString(instruction->AsLoadString()); } // TODO: Move the sharpening of invoke-virtual/-interface/-super from HGraphBuilder // here. Rewrite it to avoid the CompilerDriver's reliance on verifier data // because we know the type better when inlining. - // TODO: HLoadClass - select better load kind if available. } } } @@ -153,6 +154,123 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { invoke->SetDispatchInfo(dispatch_info); } +void HSharpening::ProcessLoadClass(HLoadClass* load_class) { + if (load_class->NeedsAccessCheck()) { + // We need to call the runtime anyway, so we simply get the class as that call's return value. + return; + } + if (load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) { + // Loading from the ArtMethod* is the most efficient retrieval. + // TODO: This may not actually be true for all architectures and + // locations of target classes. The additional register pressure + // for using the ArtMethod* should be considered. + return; + } + + DCHECK_EQ(load_class->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod); + DCHECK(!load_class->IsInDexCache()) << "HLoadClass should not be optimized before sharpening."; + + const DexFile& dex_file = load_class->GetDexFile(); + uint32_t type_index = load_class->GetTypeIndex(); + + bool is_in_dex_cache = false; + HLoadClass::LoadKind desired_load_kind; + uint64_t address = 0u; // Class or dex cache element address. + { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + Runtime* runtime = Runtime::Current(); + ClassLinker* class_linker = runtime->GetClassLinker(); + Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile()) + ? compilation_unit_.GetDexCache() + : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); + mirror::Class* klass = dex_cache->GetResolvedType(type_index); + + if (compiler_driver_->IsBootImage()) { + // Compiling boot image. Check if the class is a boot image class. + DCHECK(!runtime->UseJitCompilation()); + if (!compiler_driver_->GetSupportBootImageFixup()) { + // MIPS/MIPS64 or compiler_driver_test. Do not sharpen. + desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + } else { + if (klass != nullptr && + compiler_driver_->IsImageClass( + dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) { + is_in_dex_cache = true; + desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic() + ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative + : HLoadClass::LoadKind::kBootImageLinkTimeAddress; + } else { + // Not a boot image class. We must go through the dex cache. + DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)); + desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative; + } + } + } else if (runtime->UseJitCompilation()) { + // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. + // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); + is_in_dex_cache = (klass != nullptr); + if (klass != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(klass)) { + // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 + desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; + address = reinterpret_cast64<uint64_t>(klass); + } else { + // Note: If the class is not in the dex cache or isn't initialized, the + // instruction needs environment and will not be inlined across dex files. + // Within a dex file, the slow-path helper loads the correct class and + // inlined frames are used correctly for OOM stack trace. + // TODO: Write a test for this. Bug: 29416588 + desired_load_kind = HLoadClass::LoadKind::kDexCacheAddress; + void* dex_cache_element_address = &dex_cache->GetResolvedTypes()[type_index]; + address = reinterpret_cast64<uint64_t>(dex_cache_element_address); + } + } else { + // AOT app compilation. Check if the class is in the boot image. + if ((klass != nullptr) && + runtime->GetHeap()->ObjectIsInBootImageSpace(klass) && + !codegen_->GetCompilerOptions().GetCompilePic()) { + desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; + address = reinterpret_cast64<uint64_t>(klass); + } else { + // Not JIT and either the klass is not in boot image or we are compiling in PIC mode. + // Use PC-relative load from the dex cache if the dex file belongs + // to the oat file that we're currently compiling. + desired_load_kind = + ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &load_class->GetDexFile()) + ? HLoadClass::LoadKind::kDexCachePcRelative + : HLoadClass::LoadKind::kDexCacheViaMethod; + } + } + } + if (is_in_dex_cache) { + load_class->MarkInDexCache(); + } + + HLoadClass::LoadKind load_kind = codegen_->GetSupportedLoadClassKind(desired_load_kind); + switch (load_kind) { + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kDexCacheViaMethod: + load_class->SetLoadKindWithTypeReference(load_kind, dex_file, type_index); + break; + case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kDexCacheAddress: + DCHECK_NE(address, 0u); + load_class->SetLoadKindWithAddress(load_kind, address); + break; + case HLoadClass::LoadKind::kDexCachePcRelative: { + size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); + DexCacheArraysLayout layout(pointer_size, &dex_file); + size_t element_index = layout.TypeOffset(type_index); + load_class->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index); + break; + } + default: + LOG(FATAL) << "Unexpected load kind: " << load_kind; + UNREACHABLE(); + } +} + void HSharpening::ProcessLoadString(HLoadString* load_string) { DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod); DCHECK(!load_string->IsInDexCache()); @@ -193,13 +311,14 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { mirror::String* string = dex_cache->GetResolvedString(string_index); is_in_dex_cache = (string != nullptr); if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { + // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 desired_load_kind = HLoadString::LoadKind::kBootImageAddress; address = reinterpret_cast64<uint64_t>(string); } else { // Note: If the string is not in the dex cache, the instruction needs environment // and will not be inlined across dex files. Within a dex file, the slow-path helper // loads the correct string and inlined frames are used correctly for OOM stack trace. - // TODO: Write a test for this. + // TODO: Write a test for this. Bug: 29416588 desired_load_kind = HLoadString::LoadKind::kDexCacheAddress; void* dex_cache_element_address = &dex_cache->GetStrings()[string_index]; address = reinterpret_cast64<uint64_t>(dex_cache_element_address); @@ -207,20 +326,18 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { } else { // AOT app compilation. Try to lookup the string without allocating if not found. mirror::String* string = class_linker->LookupString(dex_file, string_index, dex_cache); - if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { - if (codegen_->GetCompilerOptions().GetCompilePic()) { - // Use PC-relative load from the dex cache if the dex file belongs - // to the oat file that we're currently compiling. - desired_load_kind = ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file) - ? HLoadString::LoadKind::kDexCachePcRelative - : HLoadString::LoadKind::kDexCacheViaMethod; - } else { - desired_load_kind = HLoadString::LoadKind::kBootImageAddress; - address = reinterpret_cast64<uint64_t>(string); - } + if (string != nullptr && + runtime->GetHeap()->ObjectIsInBootImageSpace(string) && + !codegen_->GetCompilerOptions().GetCompilePic()) { + desired_load_kind = HLoadString::LoadKind::kBootImageAddress; + address = reinterpret_cast64<uint64_t>(string); } else { - // Not JIT and the string is not in boot image. - desired_load_kind = HLoadString::LoadKind::kDexCachePcRelative; + // Not JIT and either the string is not in boot image or we are compiling in PIC mode. + // Use PC-relative load from the dex cache if the dex file belongs + // to the oat file that we're currently compiling. + desired_load_kind = ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file) + ? HLoadString::LoadKind::kDexCachePcRelative + : HLoadString::LoadKind::kDexCacheViaMethod; } } } diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 24152f6b71..d35ae66e05 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -47,6 +47,7 @@ class HSharpening : public HOptimization { private: void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke); + void ProcessLoadClass(HLoadClass* load_class); void ProcessLoadString(HLoadString* load_string); CodeGenerator* codegen_; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 11a254ef63..fc8af6462a 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -228,7 +228,7 @@ size_t StackMapStream::ComputeDexRegisterMapsSize() const { void StackMapStream::ComputeInlineInfoEncoding() { uint32_t method_index_max = 0; - uint32_t dex_pc_max = 0; + uint32_t dex_pc_max = DexFile::kDexNoIndex; uint32_t invoke_type_max = 0; uint32_t inline_info_index = 0; @@ -236,7 +236,10 @@ void StackMapStream::ComputeInlineInfoEncoding() { for (size_t j = 0; j < entry.inlining_depth; ++j) { InlineInfoEntry inline_entry = inline_infos_[inline_info_index++]; method_index_max = std::max(method_index_max, inline_entry.method_index); - dex_pc_max = std::max(dex_pc_max, inline_entry.dex_pc); + if (inline_entry.dex_pc != DexFile::kDexNoIndex && + (dex_pc_max == DexFile::kDexNoIndex || dex_pc_max < inline_entry.dex_pc)) { + dex_pc_max = inline_entry.dex_pc; + } invoke_type_max = std::max(invoke_type_max, static_cast<uint32_t>(inline_entry.invoke_type)); } } diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 41f72f508b..53a9795d52 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -108,7 +108,7 @@ class StackMapStream : public ValueObject { }; struct InlineInfoEntry { - uint32_t dex_pc; + uint32_t dex_pc; // DexFile::kDexNoIndex for intrinsified native methods. uint32_t method_index; InvokeType invoke_type; uint32_t num_dex_registers; |