diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator.h | 3 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 587 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips.h | 48 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips64.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/dex_cache_array_fixups_mips.cc | 44 | ||||
-rw-r--r-- | compiler/optimizing/dex_cache_array_fixups_mips.h | 11 | ||||
-rw-r--r-- | compiler/optimizing/instruction_builder.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/instruction_builder.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/nodes.cc | 19 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 41 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/pc_relative_fixups_mips.cc | 37 |
16 files changed, 722 insertions, 78 deletions
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 2042adef1c..62dd1cc818 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -359,7 +359,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`. template <size_t pointer_size> static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) { - DCHECK_LT(reg, 32u); + // The entry point list defines 30 ReadBarrierMarkRegX entry points. + DCHECK_LT(reg, 30u); // The ReadBarrierMarkRegX entry points are ordered by increasing // register number in Thread::tls_Ptr_.quick_entrypoints. return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value() diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 05cb8d1940..a07a2331fc 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -18,7 +18,6 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_ #include "code_generator.h" -#include "dex/compiler_enums.h" #include "driver/compiler_options.h" #include "nodes.h" #include "string_reference.h" diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 88e8cead32..03f5a3364c 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -20,7 +20,6 @@ #include "arch/arm64/quick_method_frame_info_arm64.h" #include "code_generator.h" #include "common_arm64.h" -#include "dex/compiler_enums.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 39248aa430..334d30d90e 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -482,11 +482,22 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, move_resolver_(graph->GetArena(), this), assembler_(graph->GetArena(), &isa_features), isa_features_(isa_features), + uint32_literals_(std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), method_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(StringReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(TypeReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_address_patches_(std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + clobbered_ra_(false) { // Save RA (containing the return address) to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(RA)); } @@ -688,6 +699,16 @@ void CodeGeneratorMIPS::ComputeSpillMask() { if ((fpu_spill_mask_ != 0) && (POPCOUNT(core_spill_mask_) % 2 != 0)) { core_spill_mask_ |= (1 << ZERO); } + // If RA is clobbered by PC-relative operations on R2 and it's the only spilled register + // (this can happen in leaf methods), artificially spill the ZERO register in order to + // force explicit saving and restoring of RA. RA isn't saved/restored when it's the only + // spilled register. + // TODO: Can this be improved? It causes creation of a stack frame (while RA might be + // saved in an unused temporary register) and saving of RA and the current method pointer + // in the frame. + if (clobbered_ra_ && core_spill_mask_ == (1u << RA) && fpu_spill_mask_ == 0) { + core_spill_mask_ |= (1 << ZERO); + } } static dwarf::Reg DWARFReg(Register reg) { @@ -962,7 +983,12 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch size_t size = method_patches_.size() + call_patches_.size() + - pc_relative_dex_cache_patches_.size(); + pc_relative_dex_cache_patches_.size() + + pc_relative_string_patches_.size() + + pc_relative_type_patches_.size() + + boot_image_string_patches_.size() + + boot_image_type_patches_.size() + + boot_image_address_patches_.size(); linker_patches->reserve(size); for (const auto& entry : method_patches_) { const MethodReference& target_method = entry.first; @@ -994,6 +1020,71 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch pc_rel_offset, base_element_offset)); } + for (const PcRelativePatchInfo& info : pc_relative_string_patches_) { + const DexFile& dex_file = info.target_dex_file; + size_t string_index = info.offset_or_index; + DCHECK(info.high_label.IsBound()); + uint32_t high_offset = __ GetLabelLocation(&info.high_label); + // On R2 we use HMipsComputeBaseMethodAddress and patch relative to + // the assembler's base label used for PC-relative literals. + uint32_t pc_rel_offset = info.pc_rel_label.IsBound() + ? __ GetLabelLocation(&info.pc_rel_label) + : __ GetPcRelBaseLabelLocation(); + linker_patches->push_back(LinkerPatch::RelativeStringPatch(high_offset, + &dex_file, + pc_rel_offset, + string_index)); + } + for (const PcRelativePatchInfo& info : pc_relative_type_patches_) { + const DexFile& dex_file = info.target_dex_file; + size_t type_index = info.offset_or_index; + DCHECK(info.high_label.IsBound()); + uint32_t high_offset = __ GetLabelLocation(&info.high_label); + // On R2 we use HMipsComputeBaseMethodAddress and patch relative to + // the assembler's base label used for PC-relative literals. + uint32_t pc_rel_offset = info.pc_rel_label.IsBound() + ? __ GetLabelLocation(&info.pc_rel_label) + : __ GetPcRelBaseLabelLocation(); + linker_patches->push_back(LinkerPatch::RelativeTypePatch(high_offset, + &dex_file, + pc_rel_offset, + type_index)); + } + for (const auto& entry : boot_image_string_patches_) { + const StringReference& target_string = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::StringPatch(literal_offset, + target_string.dex_file, + target_string.string_index)); + } + for (const auto& entry : boot_image_type_patches_) { + const TypeReference& target_type = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, + target_type.dex_file, + target_type.type_index)); + } + for (const auto& entry : boot_image_address_patches_) { + DCHECK(GetCompilerOptions().GetIncludePatchInformation()); + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset)); + } +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch( + const DexFile& dex_file, uint32_t string_index) { + return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_); +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch( + const DexFile& dex_file, uint32_t type_index) { + return NewPcRelativePatch(dex_file, type_index, &pc_relative_type_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch( @@ -1007,6 +1098,12 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( return &patches->back(); } +Literal* CodeGeneratorMIPS::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) { + return map->GetOrCreate( + value, + [this, value]() { return __ NewLiteral<uint32_t>(value); }); +} + Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map) { return map->GetOrCreate( @@ -1022,6 +1119,26 @@ Literal* CodeGeneratorMIPS::DeduplicateMethodCodeLiteral(MethodReference target_ return DeduplicateMethodLiteral(target_method, &call_patches_); } +Literal* CodeGeneratorMIPS::DeduplicateBootImageStringLiteral(const DexFile& dex_file, + uint32_t string_index) { + return boot_image_string_patches_.GetOrCreate( + StringReference(&dex_file, string_index), + [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); +} + +Literal* CodeGeneratorMIPS::DeduplicateBootImageTypeLiteral(const DexFile& dex_file, + uint32_t type_index) { + return boot_image_type_patches_.GetOrCreate( + TypeReference(&dex_file, type_index), + [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); +} + +Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) { + bool needs_patch = GetCompilerOptions().GetIncludePatchInformation(); + Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_; + return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map); +} + void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { MipsLabel done; Register card = AT; @@ -1067,6 +1184,15 @@ void CodeGeneratorMIPS::SetupBlockedRegisters() const { blocked_fpu_registers_[i] = true; } + if (GetGraph()->IsDebuggable()) { + // Stubs do not save callee-save floating point registers. If the graph + // is debuggable, we need to deal with these registers differently. For + // now, just block them. + for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { + blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; + } + } + UpdateBlockedPairRegisters(); } @@ -3440,7 +3566,8 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field if (field_type == Primitive::kPrimLong) { locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimLong)); } else { - locations->SetOut(Location::RequiresFpuRegister()); + // Use Location::Any() to prevent situations when running out of available fp registers. + locations->SetOut(Location::Any()); // Need some temp core regs since FP results are returned in core registers Location reg = calling_convention.GetReturnLocation(Primitive::kPrimLong); locations->AddTemp(Location::RegisterLocation(reg.AsRegisterPairLow<Register>())); @@ -3505,11 +3632,23 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, IsDirectEntrypoint(kQuickA64Load)); CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>(); if (type == Primitive::kPrimDouble) { - // Need to move to FP regs since FP results are returned in core registers. - __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), - locations->Out().AsFpuRegister<FRegister>()); - __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(), - locations->Out().AsFpuRegister<FRegister>()); + // FP results are returned in core registers. Need to move them. + Location out = locations->Out(); + if (out.IsFpuRegister()) { + __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), out.AsFpuRegister<FRegister>()); + __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(), + out.AsFpuRegister<FRegister>()); + } else { + DCHECK(out.IsDoubleStackSlot()); + __ StoreToOffset(kStoreWord, + locations->GetTemp(1).AsRegister<Register>(), + SP, + out.GetStackIndex()); + __ StoreToOffset(kStoreWord, + locations->GetTemp(2).AsRegister<Register>(), + SP, + out.GetStackIndex() + 4); + } } } else { if (!Primitive::IsFloatingPointType(type)) { @@ -3568,7 +3707,8 @@ void LocationsBuilderMIPS::HandleFieldSet(HInstruction* instruction, const Field locations->SetInAt(1, Location::RegisterPairLocation( calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); } else { - locations->SetInAt(1, Location::RequiresFpuRegister()); + // Use Location::Any() to prevent situations when running out of available fp registers. + locations->SetInAt(1, Location::Any()); // Pass FP parameters in core registers. locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(3))); @@ -3627,10 +3767,28 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); if (type == Primitive::kPrimDouble) { // Pass FP parameters in core registers. - __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), - locations->InAt(1).AsFpuRegister<FRegister>()); - __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(), - locations->InAt(1).AsFpuRegister<FRegister>()); + Location in = locations->InAt(1); + if (in.IsFpuRegister()) { + __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), in.AsFpuRegister<FRegister>()); + __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(), + in.AsFpuRegister<FRegister>()); + } else if (in.IsDoubleStackSlot()) { + __ LoadFromOffset(kLoadWord, + locations->GetTemp(1).AsRegister<Register>(), + SP, + in.GetStackIndex()); + __ LoadFromOffset(kLoadWord, + locations->GetTemp(2).AsRegister<Register>(), + SP, + in.GetStackIndex() + 4); + } else { + DCHECK(in.IsConstant()); + DCHECK(in.GetConstant()->IsDoubleConstant()); + int64_t value = bit_cast<int64_t, double>(in.GetConstant()->AsDoubleConstant()->GetValue()); + __ LoadConst64(locations->GetTemp(2).AsRegister<Register>(), + locations->GetTemp(1).AsRegister<Register>(), + value); + } } codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store), instruction, @@ -3696,6 +3854,23 @@ void InstructionCodeGeneratorMIPS::VisitInstanceFieldSet(HInstanceFieldSet* inst HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc()); } +void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad( + HInstruction* instruction ATTRIBUTE_UNUSED, + Location root, + Register obj, + uint32_t offset) { + Register root_reg = root.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + UNIMPLEMENTED(FATAL) << "for read barrier"; + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath; @@ -3861,16 +4036,80 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen } HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) { - // TODO: Implement other kinds. - return HLoadString::LoadKind::kDexCacheViaMethod; + HLoadString::LoadKind desired_string_load_kind) { + if (kEmitCompilerReadBarrier) { + UNIMPLEMENTED(FATAL) << "for read barrier"; + } + // We disable PC-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); + bool fallback_load = has_irreducible_loops; + switch (desired_string_load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!GetCompilerOptions().GetCompilePic()); + break; + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + break; + case HLoadString::LoadKind::kBootImageAddress: + break; + case HLoadString::LoadKind::kDexCacheAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + fallback_load = false; + break; + case HLoadString::LoadKind::kDexCachePcRelative: + DCHECK(!Runtime::Current()->UseJitCompilation()); + // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods + // with irreducible loops. + break; + case HLoadString::LoadKind::kDexCacheViaMethod: + fallback_load = false; + break; + } + if (fallback_load) { + desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + } + return desired_string_load_kind; } HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { - DCHECK_NE(desired_class_load_kind, HLoadClass::LoadKind::kReferrersClass); - // TODO: Implement other kinds. - return HLoadClass::LoadKind::kDexCacheViaMethod; + if (kEmitCompilerReadBarrier) { + UNIMPLEMENTED(FATAL) << "for read barrier"; + } + // We disable pc-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); + bool fallback_load = has_irreducible_loops; + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kReferrersClass: + fallback_load = false; + break; + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageAddress: + break; + case HLoadClass::LoadKind::kDexCacheAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + fallback_load = false; + break; + case HLoadClass::LoadKind::kDexCachePcRelative: + DCHECK(!Runtime::Current()->UseJitCompilation()); + // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods + // with irreducible loops. + break; + case HLoadClass::LoadKind::kDexCacheViaMethod: + fallback_load = false; + break; + } + if (fallback_load) { + desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + } + return desired_class_load_kind; } Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, @@ -4107,11 +4346,40 @@ void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { - InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassLocationSummary( - cls, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(V0)); + if (cls->NeedsAccessCheck()) { + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Location::RegisterLocation(V0), + /* code_generator_supports_read_barrier */ false); // TODO: revisit this bool. + return; + } + + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + switch (load_kind) { + // We need an extra register for PC-relative literals on R2. + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + if (codegen_->GetInstructionSetFeatures().IsR6()) { + break; + } + FALLTHROUGH_INTENDED; + // We need an extra register for PC-relative dex cache accesses. + case HLoadClass::LoadKind::kDexCachePcRelative: + case HLoadClass::LoadKind::kReferrersClass: + case HLoadClass::LoadKind::kDexCacheViaMethod: + locations->SetInAt(0, Location::RequiresRegister()); + break; + default: + break; + } + locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { @@ -4127,34 +4395,126 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { return; } - Register out = locations->Out().AsRegister<Register>(); - Register current_method = locations->InAt(0).AsRegister<Register>(); - if (cls->IsReferrersClass()) { - DCHECK(!cls->CanCallRuntime()); - DCHECK(!cls->MustGenerateClinitCheck()); - __ LoadFromOffset(kLoadWord, out, current_method, - ArtMethod::DeclaringClassOffset().Int32Value()); - } else { - __ LoadFromOffset(kLoadWord, out, current_method, - ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - - if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { - DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - if (!cls->IsInDexCache()) { - __ Beqz(out, slow_path->GetEntryLabel()); - } - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); + Register base_or_current_method_reg; + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + switch (load_kind) { + // We need an extra register for PC-relative literals on R2. + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); + break; + // We need an extra register for PC-relative dex cache accesses. + case HLoadClass::LoadKind::kDexCachePcRelative: + case HLoadClass::LoadKind::kReferrersClass: + case HLoadClass::LoadKind::kDexCacheViaMethod: + base_or_current_method_reg = locations->InAt(0).AsRegister<Register>(); + break; + default: + base_or_current_method_reg = ZERO; + break; + } + + bool generate_null_check = false; + switch (load_kind) { + case HLoadClass::LoadKind::kReferrersClass: { + DCHECK(!cls->CanCallRuntime()); + DCHECK(!cls->MustGenerateClinitCheck()); + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad(cls, + out_loc, + base_or_current_method_reg, + ArtMethod::DeclaringClassOffset().Int32Value()); + break; + } + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!kEmitCompilerReadBarrier); + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), + cls->GetTypeIndex())); + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(!kEmitCompilerReadBarrier); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + if (isR6) { + __ Bind(&info->high_label); + __ Bind(&info->pc_rel_label); + // Add a 32-bit offset to PC. + __ Auipc(out, /* placeholder */ 0x1234); + __ Addiu(out, out, /* placeholder */ 0x5678); } else { - __ Bind(slow_path->GetExitLabel()); + __ Bind(&info->high_label); + __ Lui(out, /* placeholder */ 0x1234); + // We do not bind info->pc_rel_label here, we'll use the assembler's label + // for PC-relative literals and the base from HMipsComputeBaseMethodAddress. + __ Ori(out, out, /* placeholder */ 0x5678); + // Add a 32-bit offset to PC. + __ Addu(out, out, base_or_current_method_reg); } + break; + } + case HLoadClass::LoadKind::kBootImageAddress: { + DCHECK(!kEmitCompilerReadBarrier); + DCHECK_NE(cls->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageAddressLiteral(address)); + break; + } + case HLoadClass::LoadKind::kDexCacheAddress: { + DCHECK_NE(cls->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); + static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes."); + DCHECK_ALIGNED(cls->GetAddress(), 4u); + int16_t offset = Low16Bits(address); + uint32_t base_address = address - offset; // This accounts for offset sign extension. + __ Lui(out, High16Bits(base_address)); + // /* GcRoot<mirror::Class> */ out = *(base_address + offset) + GenerateGcRootFieldLoad(cls, out_loc, out, offset); + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCachePcRelative: { + HMipsDexCacheArraysBase* base = cls->InputAt(0)->AsMipsDexCacheArraysBase(); + int32_t offset = + cls->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; + // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset) + GenerateGcRootFieldLoad(cls, out_loc, base_or_current_method_reg, offset); + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCacheViaMethod: { + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + __ LoadFromOffset(kLoadWord, + out, + base_or_current_method_reg, + ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + GenerateGcRootFieldLoad(cls, out_loc, out, offset); + generate_null_check = !cls->IsInDexCache(); + } + } + + if (generate_null_check || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (generate_null_check) { + __ Beqz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); } } } @@ -4183,21 +4543,132 @@ void InstructionCodeGeneratorMIPS::VisitClearException(HClearException* clear AT } void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = load->NeedsEnvironment() + LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - locations->SetInAt(0, Location::RequiresRegister()); + HLoadString::LoadKind load_kind = load->GetLoadKind(); + switch (load_kind) { + // We need an extra register for PC-relative literals on R2. + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + if (codegen_->GetInstructionSetFeatures().IsR6()) { + break; + } + FALLTHROUGH_INTENDED; + // We need an extra register for PC-relative dex cache accesses. + case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kDexCacheViaMethod: + locations->SetInAt(0, Location::RequiresRegister()); + break; + default: + break; + } locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { + HLoadString::LoadKind load_kind = load->GetLoadKind(); LocationSummary* locations = load->GetLocations(); - Register out = locations->Out().AsRegister<Register>(); - Register current_method = locations->InAt(0).AsRegister<Register>(); - __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); + Register base_or_current_method_reg; + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + switch (load_kind) { + // We need an extra register for PC-relative literals on R2. + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); + break; + // We need an extra register for PC-relative dex cache accesses. + case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kDexCacheViaMethod: + base_or_current_method_reg = locations->InAt(0).AsRegister<Register>(); + break; + default: + base_or_current_method_reg = ZERO; + break; + } + + switch (load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!kEmitCompilerReadBarrier); + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), + load->GetStringIndex())); + return; // No dex cache slow path. + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(!kEmitCompilerReadBarrier); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + if (isR6) { + __ Bind(&info->high_label); + __ Bind(&info->pc_rel_label); + // Add a 32-bit offset to PC. + __ Auipc(out, /* placeholder */ 0x1234); + __ Addiu(out, out, /* placeholder */ 0x5678); + } else { + __ Bind(&info->high_label); + __ Lui(out, /* placeholder */ 0x1234); + // We do not bind info->pc_rel_label here, we'll use the assembler's label + // for PC-relative literals and the base from HMipsComputeBaseMethodAddress. + __ Ori(out, out, /* placeholder */ 0x5678); + // Add a 32-bit offset to PC. + __ Addu(out, out, base_or_current_method_reg); + } + return; // No dex cache slow path. + } + case HLoadString::LoadKind::kBootImageAddress: { + DCHECK(!kEmitCompilerReadBarrier); + DCHECK_NE(load->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageAddressLiteral(address)); + return; // No dex cache slow path. + } + case HLoadString::LoadKind::kDexCacheAddress: { + DCHECK_NE(load->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); + static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes."); + DCHECK_ALIGNED(load->GetAddress(), 4u); + int16_t offset = Low16Bits(address); + uint32_t base_address = address - offset; // This accounts for offset sign extension. + __ Lui(out, High16Bits(base_address)); + // /* GcRoot<mirror::String> */ out = *(base_address + offset) + GenerateGcRootFieldLoad(load, out_loc, out, offset); + break; + } + case HLoadString::LoadKind::kDexCachePcRelative: { + HMipsDexCacheArraysBase* base = load->InputAt(0)->AsMipsDexCacheArraysBase(); + int32_t offset = + load->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; + // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset) + GenerateGcRootFieldLoad(load, out_loc, base_or_current_method_reg, offset); + break; + } + case HLoadString::LoadKind::kDexCacheViaMethod: { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad(load, + out_loc, + base_or_current_method_reg, + ArtMethod::DeclaringClassOffset().Int32Value()); + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ + __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); + // /* GcRoot<mirror::String> */ out = out[string_index] + GenerateGcRootFieldLoad(load, + out_loc, + out, + CodeGenerator::GetCacheOffset(load->GetStringIndex())); + break; + } + default: + LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); + UNREACHABLE(); + } if (!load->IsInDexCache()) { SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); @@ -5327,6 +5798,7 @@ void InstructionCodeGeneratorMIPS::VisitMipsComputeBaseMethodAddress( __ Nal(); // Grab the return address off RA. __ Move(reg, RA); + // TODO: Can we share this code with that of VisitMipsDexCacheArraysBase()? // Remember this offset (the obtained PC value) for later use with constant area. __ BindPcRelBaseLabel(); @@ -5357,6 +5829,7 @@ void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArra __ Ori(reg, reg, /* placeholder */ 0x5678); // Add a 32-bit offset to PC. __ Addu(reg, reg, RA); + // TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()? } } diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 08f74c04d1..63a0345c1c 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -18,11 +18,12 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_ #include "code_generator.h" -#include "dex/compiler_enums.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" +#include "string_reference.h" #include "utils/mips/assembler_mips.h" +#include "utils/type_reference.h" namespace art { namespace mips { @@ -226,6 +227,15 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset); void GenerateIntCompare(IfCondition cond, LocationSummary* locations); void GenerateIntCompareAndBranch(IfCondition cond, LocationSummary* locations, @@ -298,6 +308,9 @@ class CodeGeneratorMIPS : public CodeGenerator { size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id); size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id); size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id); + void ClobberRA() { + clobbered_ra_ = true; + } void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; @@ -383,7 +396,7 @@ class CodeGeneratorMIPS : public CodeGenerator { PcRelativePatchInfo(PcRelativePatchInfo&& other) = default; const DexFile& target_dex_file; - // Either the dex cache array element offset or the string index. + // Either the dex cache array element offset or the string/type index. uint32_t offset_or_index; // Label for the instruction loading the most significant half of the offset that's added to PC // to form the base address (the least significant half is loaded with the instruction that @@ -393,14 +406,27 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsLabel pc_rel_label; }; + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index); + PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, uint32_t type_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); + Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index); + Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index); + Literal* DeduplicateBootImageAddressLiteral(uint32_t address); private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); + using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; - + using BootStringToLiteralMap = ArenaSafeMap<StringReference, + Literal*, + StringReferenceValueComparator>; + using BootTypeToLiteralMap = ArenaSafeMap<TypeReference, + Literal*, + TypeReferenceValueComparator>; + + Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); Literal* DeduplicateMethodAddressLiteral(MethodReference target_method); Literal* DeduplicateMethodCodeLiteral(MethodReference target_method); @@ -417,11 +443,27 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsAssembler assembler_; const MipsInstructionSetFeatures& isa_features_; + // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. + Uint32ToLiteralMap uint32_literals_; // Method patch info, map MethodReference to a literal for method address and method code. MethodToLiteralMap method_patches_; MethodToLiteralMap call_patches_; // PC-relative patch info for each HMipsDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; + // Deduplication map for boot string literals for kBootImageLinkTimeAddress. + BootStringToLiteralMap boot_image_string_patches_; + // PC-relative String patch info. + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Deduplication map for boot type literals for kBootImageLinkTimeAddress. + BootTypeToLiteralMap boot_image_type_patches_; + // PC-relative type patch info. + ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + // Deduplication map for patchable boot image addresses. + Uint32ToLiteralMap boot_image_address_patches_; + + // PC-relative loads on R2 clobber RA, which may need to be preserved explicitly in leaf methods. + // This is a flag set by pc_relative_fixups_mips and dex_cache_array_fixups_mips optimizations. + bool clobbered_ra_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS); }; diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 4b462cc800..197f86b22b 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -18,7 +18,6 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_ #include "code_generator.h" -#include "dex/compiler_enums.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 12901724e7..39ea7d53a6 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -19,7 +19,6 @@ #include "arch/x86/instruction_set_features_x86.h" #include "code_generator.h" -#include "dex/compiler_enums.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index cf92d68c64..fbb78bc5f7 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -19,7 +19,6 @@ #include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_generator.h" -#include "dex/compiler_enums.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc index 0f42d9ce0f..19bab08eb4 100644 --- a/compiler/optimizing/dex_cache_array_fixups_mips.cc +++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "code_generator_mips.h" #include "dex_cache_array_fixups_mips.h" #include "base/arena_containers.h" @@ -27,8 +28,9 @@ namespace mips { */ class DexCacheArrayFixupsVisitor : public HGraphVisitor { public: - explicit DexCacheArrayFixupsVisitor(HGraph* graph) + explicit DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen) : HGraphVisitor(graph), + codegen_(down_cast<CodeGeneratorMIPS*>(codegen)), dex_cache_array_bases_(std::less<const DexFile*>(), // Attribute memory use to code generator. graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} @@ -41,9 +43,45 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { HMipsDexCacheArraysBase* base = entry.second; base->MoveBeforeFirstUserAndOutOfLoops(); } + // Computing the dex cache base for PC-relative accesses will clobber RA with + // the NAL instruction on R2. Take a note of this before generating the method + // entry. + if (!dex_cache_array_bases_.empty() && !codegen_->GetInstructionSetFeatures().IsR6()) { + codegen_->ClobberRA(); + } } private: + void VisitLoadClass(HLoadClass* load_class) OVERRIDE { + // If this is a load with PC-relative access to the dex cache types array, + // we need to add the dex cache arrays base as the special input. + if (load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCachePcRelative) { + // Initialize base for target dex file if needed. + const DexFile& dex_file = load_class->GetDexFile(); + HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file); + // Update the element offset in base. + DexCacheArraysLayout layout(kMipsPointerSize, &dex_file); + base->UpdateElementOffset(layout.TypeOffset(load_class->GetTypeIndex())); + // Add the special argument base to the load. + load_class->AddSpecialInput(base); + } + } + + void VisitLoadString(HLoadString* load_string) OVERRIDE { + // If this is a load with PC-relative access to the dex cache strings array, + // we need to add the dex cache arrays base as the special input. + if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) { + // Initialize base for target dex file if needed. + const DexFile& dex_file = load_string->GetDexFile(); + HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file); + // Update the element offset in base. + DexCacheArraysLayout layout(kMipsPointerSize, &dex_file); + base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex())); + // Add the special argument base to the load. + load_string->AddSpecialInput(base); + } + } + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { // If this is an invoke with PC-relative access to the dex cache methods array, // we need to add the dex cache arrays base as the special input. @@ -74,6 +112,8 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { }); } + CodeGeneratorMIPS* codegen_; + using DexCacheArraysBaseMap = ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>; DexCacheArraysBaseMap dex_cache_array_bases_; @@ -85,7 +125,7 @@ void DexCacheArrayFixups::Run() { // that can be live-in at the irreducible loop header. return; } - DexCacheArrayFixupsVisitor visitor(graph_); + DexCacheArrayFixupsVisitor visitor(graph_, codegen_); visitor.VisitInsertionOrder(); visitor.MoveBasesIfNeeded(); } diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h index c8def2842e..21056e130a 100644 --- a/compiler/optimizing/dex_cache_array_fixups_mips.h +++ b/compiler/optimizing/dex_cache_array_fixups_mips.h @@ -21,14 +21,21 @@ #include "optimization.h" namespace art { + +class CodeGenerator; + namespace mips { class DexCacheArrayFixups : public HOptimization { public: - DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, "dex_cache_array_fixups_mips", stats) {} + DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, "dex_cache_array_fixups_mips", stats), + codegen_(codegen) {} void Run() OVERRIDE; + + private: + CodeGenerator* codegen_; }; } // namespace mips diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index afac5f9cf1..e5dab569fd 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -19,6 +19,7 @@ #include "art_method-inl.h" #include "bytecode_utils.h" #include "class_linker.h" +#include "dex_instruction-inl.h" #include "driver/compiler_options.h" #include "scoped_thread_state_change.h" diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 9cfc065da6..517cf76831 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -30,6 +30,8 @@ namespace art { +class Instruction; + class HInstructionBuilder : public ValueObject { public: HInstructionBuilder(HGraph* graph, diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index d557f42968..2808e1b5fc 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2632,4 +2632,23 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) { } } +std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind) { + switch (kind) { + case MemBarrierKind::kAnyStore: + return os << "AnyStore"; + case MemBarrierKind::kLoadAny: + return os << "LoadAny"; + case MemBarrierKind::kStoreStore: + return os << "StoreStore"; + case MemBarrierKind::kAnyAny: + return os << "AnyAny"; + case MemBarrierKind::kNTStoreStore: + return os << "NTStoreStore"; + + default: + LOG(FATAL) << "Unknown MemBarrierKind: " << static_cast<int>(kind); + UNREACHABLE(); + } +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 23ac457568..dfa8276651 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -25,7 +25,6 @@ #include "base/arena_containers.h" #include "base/arena_object.h" #include "base/stl_util.h" -#include "dex/compiler_enums.h" #include "dex_file.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "handle.h" @@ -5626,9 +5625,12 @@ inline uint32_t HLoadClass::GetDexCacheElementOffset() const { // Note: defined outside class to see operator<<(., HLoadClass::LoadKind). inline void HLoadClass::AddSpecialInput(HInstruction* special_input) { - // The special input is used for PC-relative loads on some architectures. + // The special input is used for PC-relative loads on some architectures, + // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind(); + GetLoadKind() == LoadKind::kDexCachePcRelative || + GetLoadKind() == LoadKind::kBootImageLinkTimeAddress || + GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind(); DCHECK(special_input_.GetInstruction() == nullptr); special_input_ = HUserRecord<HInstruction*>(special_input); special_input->AddUseAt(this, 0); @@ -5836,9 +5838,12 @@ inline uint32_t HLoadString::GetDexCacheElementOffset() const { // Note: defined outside class to see operator<<(., HLoadString::LoadKind). inline void HLoadString::AddSpecialInput(HInstruction* special_input) { - // The special input is used for PC-relative loads on some architectures. + // The special input is used for PC-relative loads on some architectures, + // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind(); + GetLoadKind() == LoadKind::kDexCachePcRelative || + GetLoadKind() == LoadKind::kBootImageLinkTimeAddress || + GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind(); // HLoadString::GetInputRecords() returns an empty array at this point, // so use the GetInputRecords() from the base class to set the input record. DCHECK(special_input_.GetInstruction() == nullptr); @@ -6305,6 +6310,32 @@ class HCheckCast FINAL : public HTemplateInstruction<2> { DISALLOW_COPY_AND_ASSIGN(HCheckCast); }; +/** + * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers"). + * @details We define the combined barrier types that are actually required + * by the Java Memory Model, rather than using exactly the terminology from + * the JSR-133 cookbook. These should, in many cases, be replaced by acquire/release + * primitives. Note that the JSR-133 cookbook generally does not deal with + * store atomicity issues, and the recipes there are not always entirely sufficient. + * The current recipe is as follows: + * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store. + * -# Use AnyAny barrier after volatile store. (StoreLoad is as expensive.) + * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load. + * -# Use StoreStore barrier after all stores but before return from any constructor whose + * class has final fields. + * -# Use NTStoreStore to order non-temporal stores with respect to all later + * store-to-memory instructions. Only generated together with non-temporal stores. + */ +enum MemBarrierKind { + kAnyStore, + kLoadAny, + kStoreStore, + kAnyAny, + kNTStoreStore, + kLastBarrierKind = kNTStoreStore +}; +std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind); + class HMemoryBarrier FINAL : public HTemplateInstruction<0> { public: explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc) diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index aedfcb42aa..d5b0d77fe5 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -504,7 +504,7 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, mips::PcRelativeFixups* pc_relative_fixups = new (arena) mips::PcRelativeFixups(graph, codegen, stats); mips::DexCacheArrayFixups* dex_cache_array_fixups = - new (arena) mips::DexCacheArrayFixups(graph, stats); + new (arena) mips::DexCacheArrayFixups(graph, codegen, stats); HOptimization* mips_optimizations[] = { pc_relative_fixups, dex_cache_array_fixups diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index ba405cdb69..c6acc45581 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -37,6 +37,10 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { // entry block) and relieve some pressure on the register allocator // while avoiding recalculation of the base in a loop. base_->MoveBeforeFirstUserAndOutOfLoops(); + // Computing the base for PC-relative literals will clobber RA with + // the NAL instruction on R2. Take a note of this before generating + // the method entry. + codegen_->ClobberRA(); } } @@ -58,6 +62,36 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { DCHECK(base_ != nullptr); } + void VisitLoadClass(HLoadClass* load_class) OVERRIDE { + HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); + switch (load_kind) { + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + // Add a base register for PC-relative literals on R2. + InitializePCRelativeBasePointer(); + load_class->AddSpecialInput(base_); + break; + default: + break; + } + } + + void VisitLoadString(HLoadString* load_string) OVERRIDE { + HLoadString::LoadKind load_kind = load_string->GetLoadKind(); + switch (load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + // Add a base register for PC-relative literals on R2. + InitializePCRelativeBasePointer(); + load_string->AddSpecialInput(base_); + break; + default: + break; + } + } + void HandleInvoke(HInvoke* invoke) { // If this is an invoke-static/-direct with PC-relative dex cache array // addressing, we need the PC-relative address base. @@ -77,7 +111,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { // method pointer from the invoke. if (invoke_static_or_direct->HasCurrentMethodInput()) { DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache()); - CHECK(!has_extra_input); // TODO: review this. + CHECK(!has_extra_input); return; } @@ -116,7 +150,6 @@ void PcRelativeFixups::Run() { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_); if (mips_codegen->GetInstructionSetFeatures().IsR6()) { // Do nothing for R6 because it has PC-relative addressing. - // TODO: review. Move this check into RunArchOptimizations()? return; } if (graph_->HasIrreducibleLoops()) { |