diff options
Diffstat (limited to 'compiler/optimizing')
67 files changed, 4026 insertions, 4459 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 01155dcd37..ff59173c8b 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -43,6 +43,7 @@ #include "base/bit_utils.h" #include "base/bit_utils_iterator.h" #include "base/casts.h" +#include "base/leb128.h" #include "bytecode_utils.h" #include "class_linker.h" #include "compiled_method.h" @@ -52,7 +53,6 @@ #include "graph_visualizer.h" #include "intern_table.h" #include "intrinsics.h" -#include "leb128.h" #include "mirror/array-inl.h" #include "mirror/object_array-inl.h" #include "mirror/object_reference.h" diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 2dafbf7f6d..60de722285 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -438,8 +438,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: return false; - case TypeCheckKind::kBitstringCheck: - return true; } LOG(FATAL) << "Unreachable"; UNREACHABLE(); @@ -620,14 +618,18 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { protected: // Patch info used for recording locations of required linker patches and their targets, - // i.e. target method, string, type or code identified by their dex file and index. + // i.e. target method, string, type or code identified by their dex file and index, + // or .data.bimg.rel.ro entries identified by the boot image offset. template <typename LabelType> struct PatchInfo { - PatchInfo(const DexFile& target_dex_file, uint32_t target_index) - : dex_file(target_dex_file), index(target_index) { } - - const DexFile& dex_file; - uint32_t index; + PatchInfo(const DexFile* dex_file, uint32_t off_or_idx) + : target_dex_file(dex_file), offset_or_index(off_or_idx), label() { } + + // Target dex file or null for .data.bmig.rel.ro patches. + const DexFile* target_dex_file; + // Either the boot image offset (to write to .data.bmig.rel.ro) or string/type/method index. + uint32_t offset_or_index; + // Label for the instruction to patch. LabelType label; }; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index b47a5cf3c4..bc687e845b 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -78,6 +78,7 @@ using helpers::OutputFPRegister; using helpers::OutputRegister; using helpers::QRegisterFrom; using helpers::RegisterFrom; +using helpers::SRegisterFrom; using helpers::StackOperandFrom; using helpers::VIXLRegCodeFromART; using helpers::WRegisterFrom; @@ -1395,11 +1396,11 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), @@ -1488,6 +1489,14 @@ void CodeGeneratorARM64::GenerateFrameEntry() { MacroAssembler* masm = GetVIXLAssembler(); __ Bind(&frame_entry_label_); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireX(); + __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(temp, temp, 1); + __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + } + bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod(); if (do_overflow_check) { @@ -1881,6 +1890,8 @@ void CodeGeneratorARM64::Load(DataType::Type type, DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); __ Ldr(dst, src); break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } @@ -1959,6 +1970,8 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, __ Fmov(FPRegister(dst), temp); break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } @@ -1986,6 +1999,8 @@ void CodeGeneratorARM64::Store(DataType::Type type, DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); __ Str(src, dst); break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } @@ -2063,6 +2078,8 @@ void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, } break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } @@ -2112,26 +2129,6 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod __ Bind(slow_path->GetExitLabel()); } -void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare( - HTypeCheckInstruction* check, vixl::aarch64::Register temp) { - uint32_t path_to_root = check->GetBitstringPathToRoot(); - uint32_t mask = check->GetBitstringMask(); - DCHECK(IsPowerOfTwo(mask + 1)); - size_t mask_bits = WhichPowerOf2(mask + 1); - - if (mask_bits == 16u) { - // Load only the bitstring part of the status word. - __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset())); - } else { - // /* uint32_t */ temp = temp->status_ - __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset())); - // Extract the bitstring bits. - __ Ubfx(temp, temp, 0, mask_bits); - } - // Compare the bitstring bits to `path_to_root`. - __ Cmp(temp, path_to_root); -} - void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { BarrierType type = BarrierAll; @@ -3521,6 +3518,15 @@ void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* s HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp1 = temps.AcquireX(); + Register temp2 = temps.AcquireX(); + __ Ldr(temp1, MemOperand(sp, 0)); + __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(temp2, temp2, 1); + __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -3860,8 +3866,6 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; - case TypeCheckKind::kBitstringCheck: - break; } LocationSummary* locations = @@ -3870,13 +3874,7 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } + locations->SetInAt(1, Location::RequiresRegister()); // The "out" register is used as a temporary, so it overlaps with the inputs. // Note that TypeCheckSlowPathARM64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -3889,9 +3887,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); - Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) - ? Register() - : InputRegisterAt(instruction, 1); + Register cls = InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); Register out = OutputRegister(instruction); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -4077,23 +4073,6 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, out); - __ Cset(out, eq); - if (zero.IsLinked()) { - __ B(&done); - } - break; - } } if (zero.IsLinked()) { @@ -4116,13 +4095,7 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } + locations->SetInAt(1, Location::RequiresRegister()); // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64. locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -4132,9 +4105,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); - Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) - ? Register() - : InputRegisterAt(instruction, 1); + Register cls = InputRegisterAt(instruction, 1); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); DCHECK_GE(num_temps, 1u); DCHECK_LE(num_temps, 3u); @@ -4315,20 +4286,6 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ B(ne, &start_loop); break; } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, temp); - __ B(ne, type_check_slow_path->GetEntryLabel()); - break; - } } __ Bind(&done); @@ -4491,11 +4448,11 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { DCHECK(GetCompilerOptions().IsBootImage()); // Add ADRP with its PC-relative method patch. - vixl::aarch64::Label* adrp_label = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + vixl::aarch64::Label* adrp_label = NewBootImageMethodPatch(invoke->GetTargetMethod()); EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); // Add ADD with its PC-relative method patch. vixl::aarch64::Label* add_label = - NewPcRelativeMethodPatch(invoke->GetTargetMethod(), adrp_label); + NewBootImageMethodPatch(invoke->GetTargetMethod(), adrp_label); EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } @@ -4503,12 +4460,23 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall( // Load method address from literal pool. __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + // Add ADRP with its PC-relative .data.bimg.rel.ro patch. + uint32_t boot_image_offset = invoke->GetDispatchInfo().method_load_data; + vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset); + EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); + // Add LDR with its PC-relative .data.bimg.rel.ro patch. + vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label); + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp)); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { - // Add ADRP with its PC-relative DexCache access patch. + // Add ADRP with its PC-relative .bss entry patch. MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method); EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); - // Add LDR with its PC-relative DexCache access patch. + // Add LDR with its PC-relative .bss entry patch. vixl::aarch64::Label* ldr_label = NewMethodBssEntryPatch(target_method, adrp_label); EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); @@ -4603,51 +4571,54 @@ void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* i codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch( +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch( + uint32_t boot_image_offset, + vixl::aarch64::Label* adrp_label) { + return NewPcRelativePatch( + /* dex_file */ nullptr, boot_image_offset, adrp_label, &boot_image_method_patches_); +} + +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch( MethodReference target_method, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - adrp_label, - &pc_relative_method_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch( MethodReference target_method, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - adrp_label, - &method_bss_entry_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch( +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &pc_relative_type_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch( const DexFile& dex_file, dex::StringIndex string_index, vixl::aarch64::Label* adrp_label) { - return - NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); + return NewPcRelativePatch( + &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( const DexFile& dex_file, dex::StringIndex string_index, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { @@ -4656,7 +4627,7 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t cust } vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( - const DexFile& dex_file, + const DexFile* dex_file, uint32_t offset_or_index, vixl::aarch64::Label* adrp_label, ArenaDeque<PcRelativePatchInfo>* patches) { @@ -4723,36 +4694,45 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( ArenaVector<linker::LinkerPatch>* linker_patches) { for (const PcRelativePatchInfo& info : infos) { linker_patches->push_back(Factory(info.label.GetLocation(), - &info.target_dex_file, + info.target_dex_file, info.pc_insn_label->GetLocation(), info.offset_or_index)); } } +linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. + return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_method_patches_.size() + + boot_image_method_patches_.size() + method_bss_entry_patches_.size() + - pc_relative_type_patches_.size() + + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - pc_relative_string_patches_.size() + + boot_image_string_patches_.size() + string_bss_entry_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( - pc_relative_method_patches_, linker_patches); + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } else { - DCHECK(pc_relative_method_patches_.empty()); + EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -4920,11 +4900,11 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA // Add ADRP with its PC-relative type patch. const DexFile& dex_file = cls->GetDexFile(); dex::TypeIndex type_index = cls->GetTypeIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative type patch. vixl::aarch64::Label* add_label = - codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label); + codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label); codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); break; } @@ -4941,11 +4921,11 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA // Add ADRP with its PC-relative type patch. const DexFile& dex_file = cls->GetDexFile(); dex::TypeIndex type_index = cls->GetTypeIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add LDR with its PC-relative type patch. vixl::aarch64::Label* ldr_label = - codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label); + codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label); codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); // Extract the reference from the slot data, i.e. clear the hash bits. int32_t masked_hash = ClassTable::TableSlot::MaskHash( @@ -5088,11 +5068,11 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD // Add ADRP with its PC-relative String patch. const DexFile& dex_file = load->GetDexFile(); const dex::StringIndex string_index = load->GetStringIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative String patch. vixl::aarch64::Label* add_label = - codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label); codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); return; } @@ -5108,11 +5088,11 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD // Add ADRP with its PC-relative String patch. const DexFile& dex_file = load->GetDexFile(); const dex::StringIndex string_index = load->GetStringIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add LDR with its PC-relative String patch. vixl::aarch64::Label* ldr_label = - codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label); codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); return; } @@ -5510,6 +5490,153 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { } } +// TODO: integrate with HandleBinaryOp? +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorARM64::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1 = locations->InAt(0); + Location op2 = locations->InAt(1); + Location out = locations->Out(); + + Register op1_reg; + Register op2_reg; + Register out_reg; + if (type == DataType::Type::kInt64) { + op1_reg = XRegisterFrom(op1); + op2_reg = XRegisterFrom(op2); + out_reg = XRegisterFrom(out); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + op1_reg = WRegisterFrom(op1); + op2_reg = WRegisterFrom(op2); + out_reg = WRegisterFrom(out); + } + + __ Cmp(op1_reg, op2_reg); + __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt); +} + +void InstructionCodeGeneratorARM64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1 = locations->InAt(0); + Location op2 = locations->InAt(1); + Location out = locations->Out(); + + FPRegister op1_reg; + FPRegister op2_reg; + FPRegister out_reg; + if (type == DataType::Type::kFloat64) { + op1_reg = DRegisterFrom(op1); + op2_reg = DRegisterFrom(op2); + out_reg = DRegisterFrom(out); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + op1_reg = SRegisterFrom(op1); + op2_reg = SRegisterFrom(op2); + out_reg = SRegisterFrom(out); + } + + if (is_min) { + __ Fmin(out_reg, op1_reg, op2_reg); + } else { + __ Fmax(out_reg, op1_reg, op2_reg); + } +} + +// TODO: integrate with HandleBinaryOp? +void InstructionCodeGeneratorARM64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderARM64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorARM64::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderARM64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorARM64::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderARM64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) { + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + Register in_reg = InputRegisterAt(abs, 0); + Register out_reg = OutputRegister(abs); + __ Cmp(in_reg, Operand(0)); + __ Cneg(out_reg, in_reg, lt); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + FPRegister in_reg = InputFPRegisterAt(abs, 0); + FPRegister out_reg = OutputFPRegister(abs); + __ Fabs(out_reg, in_reg); + break; + } + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index cc369de983..cb61b69609 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -264,8 +264,6 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::aarch64::Register class_reg); - void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, - vixl::aarch64::Register temp); void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); @@ -275,6 +273,10 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void HandleCondition(HCondition* instruction); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -563,12 +565,19 @@ class CodeGeneratorARM64 : public CodeGenerator { UNIMPLEMENTED(FATAL); } - // Add a new PC-relative method patch for an instruction and return the label + // Add a new boot image relocation patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::aarch64::Label* NewBootImageRelRoPatch(uint32_t boot_image_offset, + vixl::aarch64::Label* adrp_label = nullptr); + + // Add a new boot image method patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeMethodPatch(MethodReference target_method, - vixl::aarch64::Label* adrp_label = nullptr); + vixl::aarch64::Label* NewBootImageMethodPatch(MethodReference target_method, + vixl::aarch64::Label* adrp_label = nullptr); // Add a new .bss entry method patch for an instruction and return // the label to be bound before the instruction. The instruction will be @@ -577,13 +586,13 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method, vixl::aarch64::Label* adrp_label = nullptr); - // Add a new PC-relative type patch for an instruction and return the label + // Add a new boot image type patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeTypePatch(const DexFile& dex_file, - dex::TypeIndex type_index, - vixl::aarch64::Label* adrp_label = nullptr); + vixl::aarch64::Label* NewBootImageTypePatch(const DexFile& dex_file, + dex::TypeIndex type_index, + vixl::aarch64::Label* adrp_label = nullptr); // Add a new .bss entry type patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the @@ -593,13 +602,13 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::TypeIndex type_index, vixl::aarch64::Label* adrp_label = nullptr); - // Add a new PC-relative string patch for an instruction and return the label + // Add a new boot image string patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, - vixl::aarch64::Label* adrp_label = nullptr); + vixl::aarch64::Label* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label = nullptr); // Add a new .bss entry string patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the @@ -779,17 +788,12 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value); vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value); - // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays - // and boot image strings/types. The only difference is the interpretation of the - // offset_or_index. - struct PcRelativePatchInfo { - PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx) - : target_dex_file(dex_file), offset_or_index(off_or_idx), label(), pc_insn_label() { } + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + struct PcRelativePatchInfo : PatchInfo<vixl::aarch64::Label> { + PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx) + : PatchInfo<vixl::aarch64::Label>(dex_file, off_or_idx), pc_insn_label() { } - const DexFile& target_dex_file; - // Either the dex cache array element offset or the string/type index. - uint32_t offset_or_index; - vixl::aarch64::Label label; vixl::aarch64::Label* pc_insn_label; }; @@ -800,7 +804,7 @@ class CodeGeneratorARM64 : public CodeGenerator { uint32_t custom_data; }; - vixl::aarch64::Label* NewPcRelativePatch(const DexFile& dex_file, + vixl::aarch64::Label* NewPcRelativePatch(const DexFile* dex_file, uint32_t offset_or_index, vixl::aarch64::Label* adrp_label, ArenaDeque<PcRelativePatchInfo>* patches); @@ -827,16 +831,16 @@ class CodeGeneratorARM64 : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // Deduplication map for 64-bit literals, used for non-patchable method address or method code. Uint64ToLiteralMap uint64_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative/BootImageRelRo. + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Baker read barrier patch info. diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 504c6479cc..94438df898 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -2354,11 +2354,11 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), @@ -2485,17 +2485,36 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); __ Bind(&frame_entry_label_); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(temp, temp, 1); + __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + } + if (HasEmptyFrame()) { return; } if (!skip_overflow_check) { - // Using r4 instead of IP saves 2 bytes. Start by asserting that r4 is available here. - for (vixl32::Register reg : kParameterCoreRegistersVIXL) { - DCHECK(!reg.Is(r4)); + // Using r4 instead of IP saves 2 bytes. + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp; + // TODO: Remove this check when R4 is made a callee-save register + // in ART compiled code (b/72801708). Currently we need to make + // sure r4 is not blocked, e.g. in special purpose + // TestCodeGeneratorARMVIXL; also asserting that r4 is available + // here. + if (!blocked_core_registers_[R4]) { + for (vixl32::Register reg : kParameterCoreRegistersVIXL) { + DCHECK(!reg.Is(r4)); + } + DCHECK(!kCoreCalleeSaves.Includes(r4)); + temp = r4; + } else { + temp = temps.Acquire(); } - DCHECK(!kCoreCalleeSaves.Includes(r4)); - vixl32::Register temp = r4; __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm))); // The load must immediately precede RecordPcInfo. ExactAssemblyScope aas(GetVIXLAssembler(), @@ -2646,6 +2665,8 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Typ } } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; break; @@ -2661,6 +2682,7 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::T case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: + case DataType::Type::kUint32: case DataType::Type::kInt32: { return LocationFrom(r0); } @@ -2669,6 +2691,7 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::T return LocationFrom(s0); } + case DataType::Type::kUint64: case DataType::Type::kInt64: { return LocationFrom(r0, r1); } @@ -2790,6 +2813,16 @@ void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Push(vixl32::Register(kMethodRegister)); + GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize); + __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(temp, temp, 1); + __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + __ Pop(vixl32::Register(kMethodRegister)); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -4657,6 +4690,299 @@ void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + vixl32::Register op1 = RegisterFrom(op1_loc); + vixl32::Register op2 = RegisterFrom(op2_loc); + vixl32::Register out = RegisterFrom(out_loc); + + __ Cmp(op1, op2); + + { + ExactAssemblyScope aas(GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ ite(is_min ? lt : gt); + __ mov(is_min ? lt : gt, out, op1); + __ mov(is_min ? ge : le, out, op2); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. + return; + } + + vixl32::Register op1_lo = LowRegisterFrom(op1_loc); + vixl32::Register op1_hi = HighRegisterFrom(op1_loc); + vixl32::Register op2_lo = LowRegisterFrom(op2_loc); + vixl32::Register op2_hi = HighRegisterFrom(op2_loc); + vixl32::Register out_lo = LowRegisterFrom(out_loc); + vixl32::Register out_hi = HighRegisterFrom(out_loc); + UseScratchRegisterScope temps(GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + + DCHECK(op1_lo.Is(out_lo)); + DCHECK(op1_hi.Is(out_hi)); + + // Compare op1 >= op2, or op1 < op2. + __ Cmp(out_lo, op2_lo); + __ Sbcs(temp, out_hi, op2_hi); + + // Now GE/LT condition code is correct for the long comparison. + { + vixl32::ConditionType cond = is_min ? ge : lt; + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ itt(cond); + __ mov(cond, out_lo, op2_lo); + __ mov(cond, out_hi, op2_hi); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) { + LocationSummary* locations = minmax->GetLocations(); + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. + return; + } + + vixl32::SRegister op1 = SRegisterFrom(op1_loc); + vixl32::SRegister op2 = SRegisterFrom(op2_loc); + vixl32::SRegister out = SRegisterFrom(out_loc); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + const vixl32::Register temp1 = temps.Acquire(); + vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0)); + vixl32::Label nan, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done); + + DCHECK(op1.Is(out)); + + __ Vcmp(op1, op2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling. + + // op1 <> op2 + vixl32::ConditionType cond = is_min ? gt : lt; + { + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cond); + __ vmov(cond, F32, out, op2); + } + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); + + // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). + __ Vmov(temp1, op1); + __ Vmov(temp2, op2); + if (is_min) { + __ Orr(temp1, temp1, temp2); + } else { + __ And(temp1, temp1, temp2); + } + __ Vmov(out, temp1); + __ B(final_label); + + // handle NaN input. + __ Bind(&nan); + __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. + __ Vmov(out, temp1); + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) { + LocationSummary* locations = minmax->GetLocations(); + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. + return; + } + + vixl32::DRegister op1 = DRegisterFrom(op1_loc); + vixl32::DRegister op2 = DRegisterFrom(op2_loc); + vixl32::DRegister out = DRegisterFrom(out_loc); + vixl32::Label handle_nan_eq, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done); + + DCHECK(op1.Is(out)); + + __ Vcmp(op1, op2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling. + + // op1 <> op2 + vixl32::ConditionType cond = is_min ? gt : lt; + { + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cond); + __ vmov(cond, F64, out, op2); + } + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); + + // handle op1 == op2, max(+0.0,-0.0). + if (!is_min) { + __ Vand(F64, out, op1, op2); + __ B(final_label); + } + + // handle op1 == op2, min(+0.0,-0.0), NaN input. + __ Bind(&handle_nan_eq); + __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + GenerateMinMaxInt(minmax->GetLocations(), is_min); + break; + case DataType::Type::kInt64: + GenerateMinMaxLong(minmax->GetLocations(), is_min); + break; + case DataType::Type::kFloat32: + GenerateMinMaxFloat(minmax, is_min); + break; + case DataType::Type::kFloat64: + GenerateMinMaxDouble(minmax, is_min); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderARMVIXL::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderARMVIXL::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + vixl32::Register in_reg = RegisterFrom(locations->InAt(0)); + vixl32::Register out_reg = RegisterFrom(locations->Out()); + vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); + __ Asr(mask, in_reg, 31); + __ Add(out_reg, in_reg, mask); + __ Eor(out_reg, out_reg, mask); + break; + } + case DataType::Type::kInt64: { + Location in = locations->InAt(0); + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + Location output = locations->Out(); + vixl32::Register out_reg_lo = LowRegisterFrom(output); + vixl32::Register out_reg_hi = HighRegisterFrom(output); + DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; + vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); + __ Asr(mask, in_reg_hi, 31); + __ Adds(out_reg_lo, in_reg_lo, mask); + __ Adc(out_reg_hi, in_reg_hi, mask); + __ Eor(out_reg_lo, out_reg_lo, mask); + __ Eor(out_reg_hi, out_reg_hi, mask); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0)); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); @@ -5498,6 +5824,8 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); @@ -5742,6 +6070,8 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << load_type; UNREACHABLE(); @@ -6234,6 +6564,8 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -6523,6 +6855,8 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << value_type; UNREACHABLE(); @@ -7090,7 +7424,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); break; } @@ -7105,7 +7439,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ case HLoadClass::LoadKind::kBootImageClassTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); __ Ldr(out, MemOperand(out, /* offset */ 0)); // Extract the reference from the slot data, i.e. clear the hash bits. @@ -7195,67 +7529,6 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( __ Bind(slow_path->GetExitLabel()); } -void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare( - HTypeCheckInstruction* check, - vixl32::Register temp, - vixl32::FlagsUpdate flags_update) { - uint32_t path_to_root = check->GetBitstringPathToRoot(); - uint32_t mask = check->GetBitstringMask(); - DCHECK(IsPowerOfTwo(mask + 1)); - size_t mask_bits = WhichPowerOf2(mask + 1); - - // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs - // the Z flag for BNE. This is indicated by the `flags_update` parameter. - if (mask_bits == 16u) { - // Load only the bitstring part of the status word. - __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); - // Check if the bitstring bits are equal to `path_to_root`. - if (flags_update == SetFlags) { - __ Cmp(temp, path_to_root); - } else { - __ Sub(temp, temp, path_to_root); - } - } else { - // /* uint32_t */ temp = temp->status_ - __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); - if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) { - // Compare the bitstring bits using SUB. - __ Sub(temp, temp, path_to_root); - // Shift out bits that do not contribute to the comparison. - __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); - } else if (IsUint<16>(path_to_root)) { - if (temp.IsLow()) { - // Note: Optimized for size but contains one more dependent instruction than necessary. - // MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the - // macro assembler would use the high reg IP for the constant by default. - // Compare the bitstring bits using SUB. - __ Sub(temp, temp, path_to_root & 0x00ffu); // 16-bit SUB (immediate) T2 - __ Sub(temp, temp, path_to_root & 0xff00u); // 32-bit SUB (immediate) T3 - // Shift out bits that do not contribute to the comparison. - __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); - } else { - // Extract the bitstring bits. - __ Ubfx(temp, temp, 0, mask_bits); - // Check if the bitstring bits are equal to `path_to_root`. - if (flags_update == SetFlags) { - __ Cmp(temp, path_to_root); - } else { - __ Sub(temp, temp, path_to_root); - } - } - } else { - // Shift out bits that do not contribute to the comparison. - __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); - // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`. - if (flags_update == SetFlags) { - __ Cmp(temp, path_to_root << (32u - mask_bits)); - } else { - __ Sub(temp, temp, path_to_root << (32u - mask_bits)); - } - } - } -} - HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { @@ -7313,7 +7586,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); return; } @@ -7327,7 +7600,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE case HLoadString::LoadKind::kBootImageInternTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); __ Ldr(out, MemOperand(out, /* offset */ 0)); return; @@ -7447,8 +7720,6 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; - case TypeCheckKind::kBitstringCheck: - break; } LocationSummary* locations = @@ -7457,13 +7728,7 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } + locations->SetInAt(1, Location::RequiresRegister()); // The "out" register is used as a temporary, so it overlaps with the inputs. // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -7478,9 +7743,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); vixl32::Register obj = InputRegisterAt(instruction, 0); - vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) - ? vixl32::Register() - : InputRegisterAt(instruction, 1); + vixl32::Register cls = InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); vixl32::Register out = OutputRegister(instruction); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7720,26 +7983,6 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) __ B(slow_path->GetEntryLabel()); break; } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, out, DontCare); - // If `out` is a low reg and we would have another low reg temp, we could - // optimize this as RSBS+ADC, see GenerateConditionWithZero(). - // - // Also, in some cases when `out` is a low reg and we're loading a constant to IP - // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size - // would be the same and we would have fewer direct data dependencies. - codegen_->GenerateConditionWithZero(kCondEQ, out, out); // CLZ+LSR - break; - } } if (done.IsReferenced()) { @@ -7757,13 +8000,7 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } + locations->SetInAt(1, Location::RequiresRegister()); locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -7772,9 +8009,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); vixl32::Register obj = InputRegisterAt(instruction, 0); - vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) - ? vixl32::Register() - : InputRegisterAt(instruction, 1); + vixl32::Register cls = InputRegisterAt(instruction, 1); Location temp_loc = locations->GetTemp(0); vixl32::Register temp = RegisterFrom(temp_loc); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -7959,20 +8194,6 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { __ B(ne, &start_loop, /* far_target */ false); break; } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags); - __ B(ne, type_check_slow_path->GetEntryLabel()); - break; - } } if (done.IsReferenced()) { __ Bind(&done); @@ -8965,7 +9186,7 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( break; case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { DCHECK(GetCompilerOptions().IsBootImage()); - PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + PcRelativePatchInfo* labels = NewBootImageMethodPatch(invoke->GetTargetMethod()); vixl32::Register temp_reg = RegisterFrom(temp); EmitMovwMovtPlaceholder(labels, temp_reg); break; @@ -8973,6 +9194,14 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = invoke->GetDispatchInfo().method_load_data; + PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset); + vixl32::Register temp_reg = RegisterFrom(temp); + EmitMovwMovtPlaceholder(labels, temp_reg); + GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { PcRelativePatchInfo* labels = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); @@ -9070,42 +9299,47 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall( } } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMethodPatch( +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch( + uint32_t boot_image_offset) { + return NewPcRelativePatch(/* dex_file */ nullptr, + boot_image_offset, + &boot_image_method_patches_); +} + +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch( MethodReference target_method) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - &pc_relative_method_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, &boot_image_method_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch( MethodReference target_method) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - &method_bss_entry_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, &method_bss_entry_patches_); } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch( +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageTypePatch( const DexFile& dex_file, dex::TypeIndex type_index) { - return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, &boot_image_type_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntryPatch( const DexFile& dex_file, dex::TypeIndex type_index) { - return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, &type_bss_entry_patches_); } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch( +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageStringPatch( const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); + return NewPcRelativePatch(&dex_file, string_index.index_, &boot_image_string_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch( const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &string_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, string_index.index_, &string_bss_entry_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch( - const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { + const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); return &patches->back(); } @@ -9147,45 +9381,54 @@ inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, ArenaVector<linker::LinkerPatch>* linker_patches) { for (const PcRelativePatchInfo& info : infos) { - const DexFile& dex_file = info.target_dex_file; + const DexFile* dex_file = info.target_dex_file; size_t offset_or_index = info.offset_or_index; DCHECK(info.add_pc_label.IsBound()); uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation()); // Add MOVW patch. DCHECK(info.movw_label.IsBound()); uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation()); - linker_patches->push_back(Factory(movw_offset, &dex_file, add_pc_offset, offset_or_index)); + linker_patches->push_back(Factory(movw_offset, dex_file, add_pc_offset, offset_or_index)); // Add MOVT patch. DCHECK(info.movt_label.IsBound()); uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation()); - linker_patches->push_back(Factory(movt_offset, &dex_file, add_pc_offset, offset_or_index)); + linker_patches->push_back(Factory(movt_offset, dex_file, add_pc_offset, offset_or_index)); } } +linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. + return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * boot_image_method_patches_.size() + /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * boot_image_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() + /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( - pc_relative_method_patches_, linker_patches); + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } else { - DCHECK(pc_relative_method_patches_.empty()); + EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index bd815f45b3..054acbc390 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -322,9 +322,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg); - void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, - vixl::aarch32::Register temp, - vixl::aarch32::FlagsUpdate flags_update); void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); @@ -352,6 +349,12 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min); + void GenerateMinMaxLong(LocationSummary* locations, bool is_min); + void GenerateMinMaxFloat(HInstruction* minmax, bool is_min); + void GenerateMinMaxDouble(HInstruction* minmax, bool is_min); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -555,32 +558,35 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE; - // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays - // and boot image strings/types. The only difference is the interpretation of the - // offset_or_index. The PC-relative address is loaded with three instructions, + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + // + // The PC-relative address is loaded with three instructions, // MOVW+MOVT to load the offset to base_reg and then ADD base_reg, PC. The offset // is calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we // currently emit these 3 instructions together, instruction scheduling could // split this sequence apart, so we keep separate labels for each of them. struct PcRelativePatchInfo { - PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx) + PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx) : target_dex_file(dex_file), offset_or_index(off_or_idx) { } PcRelativePatchInfo(PcRelativePatchInfo&& other) = default; - const DexFile& target_dex_file; - // Either the dex cache array element offset or the string/type index. + // Target dex file or null for .data.bmig.rel.ro patches. + const DexFile* target_dex_file; + // Either the boot image offset (to write to .data.bmig.rel.ro) or string/type/method index. uint32_t offset_or_index; vixl::aarch32::Label movw_label; vixl::aarch32::Label movt_label; vixl::aarch32::Label add_pc_label; }; - PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset); + PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); - PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, dex::StringIndex string_index); @@ -777,7 +783,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { }; VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); - PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> @@ -799,16 +805,16 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Baker read barrier patch info. diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 2ed0ab79a1..11c1163119 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -58,9 +58,11 @@ Location MipsReturnLocation(DataType::Type return_type) { case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: + case DataType::Type::kUint32: case DataType::Type::kInt32: return Location::RegisterLocation(V0); + case DataType::Type::kUint64: case DataType::Type::kInt64: return Location::RegisterPairLocation(V0, V1); @@ -140,6 +142,8 @@ Location InvokeDexCallingConventionVisitorMIPS::GetNextLocation(DataType::Type t break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; break; @@ -391,7 +395,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); - if (!is_fatal_) { + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { SaveLiveRegisters(codegen, locations); } @@ -1013,11 +1017,11 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1276,6 +1280,12 @@ static dwarf::Reg DWARFReg(Register reg) { void CodeGeneratorMIPS::GenerateFrameEntry() { __ Bind(&frame_entry_label_); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + __ Lhu(TMP, kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()); + __ Addiu(TMP, TMP, 1); + __ Sh(TMP, kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()); + } + bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kMips) || !IsLeafMethod(); @@ -1573,7 +1583,7 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, ArenaVector<linker::LinkerPatch>* linker_patches) { for (const PcRelativePatchInfo& info : infos) { - const DexFile& dex_file = info.target_dex_file; + const DexFile* dex_file = info.target_dex_file; size_t offset_or_index = info.offset_or_index; DCHECK(info.label.IsBound()); uint32_t literal_offset = __ GetLabelLocation(&info.label); @@ -1583,33 +1593,42 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( uint32_t pc_rel_offset = info_high.pc_rel_label.IsBound() ? __ GetLabelLocation(&info_high.pc_rel_label) : __ GetPcRelBaseLabelLocation(); - linker_patches->push_back(Factory(literal_offset, &dex_file, pc_rel_offset, offset_or_index)); + linker_patches->push_back(Factory(literal_offset, dex_file, pc_rel_offset, offset_or_index)); } } +linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. + return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_method_patches_.size() + + boot_image_method_patches_.size() + method_bss_entry_patches_.size() + - pc_relative_type_patches_.size() + + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - pc_relative_string_patches_.size() + + boot_image_string_patches_.size() + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( - pc_relative_method_patches_, linker_patches); + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } else { - DCHECK(pc_relative_method_patches_.empty()); + EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1620,54 +1639,58 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link DCHECK_EQ(size, linker_patches->size()); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeMethodPatch( +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageRelRoPatch( + uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_); +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageMethodPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - info_high, - &pc_relative_method_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &boot_image_method_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewMethodBssEntryPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - info_high, - &method_bss_entry_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &method_bss_entry_patches_); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch( +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, type_index.index_, info_high, &pc_relative_type_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &boot_image_type_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewTypeBssEntryPatch( const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, type_index.index_, info_high, &type_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &type_bss_entry_patches_); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch( +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageStringPatch( const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, string_index.index_, info_high, &pc_relative_string_patches_); + return NewPcRelativePatch( + &dex_file, string_index.index_, info_high, &boot_image_string_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewStringBssEntryPatch( const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, string_index.index_, info_high, &string_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &string_bss_entry_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( - const DexFile& dex_file, + const DexFile* dex_file, uint32_t offset_or_index, const PcRelativePatchInfo* info_high, ArenaDeque<PcRelativePatchInfo>* patches) { @@ -1922,41 +1945,13 @@ void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCode enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset); - __ LoadConst32(AT, shifted_initialized_value); - __ Bltu(TMP, AT, slow_path->GetEntryLabel()); + __ Sltiu(TMP, TMP, shifted_initialized_value); + __ Bnez(TMP, slow_path->GetEntryLabel()); // Even if the initialized flag is set, we need to ensure consistent memory ordering. __ Sync(0); __ Bind(slow_path->GetExitLabel()); } -void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, - Register temp) { - uint32_t path_to_root = check->GetBitstringPathToRoot(); - uint32_t mask = check->GetBitstringMask(); - DCHECK(IsPowerOfTwo(mask + 1)); - size_t mask_bits = WhichPowerOf2(mask + 1); - - if (mask_bits == 16u) { - // Load only the bitstring part of the status word. - __ LoadFromOffset( - kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); - // Compare the bitstring bits using XOR. - __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); - } else { - // /* uint32_t */ temp = temp->status_ - __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); - // Compare the bitstring bits using XOR. - if (IsUint<16>(path_to_root)) { - __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); - } else { - __ LoadConst32(TMP, path_to_root); - __ Xor(temp, temp, TMP); - } - // Shift out bits that do not contribute to the comparison. - __ Sll(temp, temp, 32 - mask_bits); - } -} - void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { __ Sync(0); // Only stype 0 is supported. } @@ -2845,6 +2840,8 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -3160,6 +3157,8 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -3299,38 +3298,12 @@ static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { } void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = (throws_into_catch || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. - break; - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; - case TypeCheckKind::kBitstringCheck: - break; - } - + LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } + locations->SetInAt(1, Location::RequiresRegister()); locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -3339,7 +3312,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); - Location cls = locations->InAt(1); + Register cls = locations->InAt(1).AsRegister<Register>(); Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -3355,18 +3328,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); MipsLabel done; - // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases - // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding - // read barriers is done for performance and code size reasons. - bool is_type_check_slow_path_fatal = false; - if (!kEmitCompilerReadBarrier) { - is_type_check_slow_path_fatal = - (type_check_kind == TypeCheckKind::kExactCheck || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck) && - !instruction->CanThrowIntoCatchBlock(); - } + bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( instruction, is_type_check_slow_path_fatal); @@ -3389,7 +3351,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); + __ Bne(temp, cls, slow_path->GetEntryLabel()); break; } @@ -3415,7 +3377,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // exception. __ Beqz(temp, slow_path->GetEntryLabel()); // Otherwise, compare the classes. - __ Bne(temp, cls.AsRegister<Register>(), &loop); + __ Bne(temp, cls, &loop); break; } @@ -3430,7 +3392,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Walk over the class hierarchy to find a match. MipsLabel loop; __ Bind(&loop); - __ Beq(temp, cls.AsRegister<Register>(), &done); + __ Beq(temp, cls, &done); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, @@ -3453,7 +3415,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { maybe_temp2_loc, kWithoutReadBarrier); // Do an exact check. - __ Beq(temp, cls.AsRegister<Register>(), &done); + __ Beq(temp, cls, &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -3512,21 +3474,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Go to next interface. __ Addiu(TMP, TMP, -2); // Compare the classes and continue the loop if they do not match. - __ Bne(AT, cls.AsRegister<Register>(), &loop); - break; - } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, temp); - __ Bnez(temp, slow_path->GetEntryLabel()); + __ Bne(AT, cls, &loop); break; } } @@ -3767,77 +3715,251 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt32); LocationSummary* locations = instruction->GetLocations(); Location second = locations->InAt(1); DCHECK(second.IsConstant()); - - Register out = locations->Out().AsRegister<Register>(); - Register dividend = locations->InAt(0).AsRegister<Register>(); - int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + int64_t imm = Int64FromConstant(second.GetConstant()); DCHECK(imm == 1 || imm == -1); - if (instruction->IsRem()) { - __ Move(out, ZERO); + if (instruction->GetResultType() == DataType::Type::kInt32) { + Register out = locations->Out().AsRegister<Register>(); + Register dividend = locations->InAt(0).AsRegister<Register>(); + + if (instruction->IsRem()) { + __ Move(out, ZERO); + } else { + if (imm == -1) { + __ Subu(out, ZERO, dividend); + } else if (out != dividend) { + __ Move(out, dividend); + } + } } else { - if (imm == -1) { - __ Subu(out, ZERO, dividend); - } else if (out != dividend) { - __ Move(out, dividend); + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); + Register out_high = locations->Out().AsRegisterPairHigh<Register>(); + Register out_low = locations->Out().AsRegisterPairLow<Register>(); + Register in_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register in_low = locations->InAt(0).AsRegisterPairLow<Register>(); + + if (instruction->IsRem()) { + __ Move(out_high, ZERO); + __ Move(out_low, ZERO); + } else { + if (imm == -1) { + __ Subu(out_low, ZERO, in_low); + __ Sltu(AT, ZERO, out_low); + __ Subu(out_high, ZERO, in_high); + __ Subu(out_high, out_high, AT); + } else { + __ Move(out_low, in_low); + __ Move(out_high, in_high); + } } } } void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt32); LocationSummary* locations = instruction->GetLocations(); Location second = locations->InAt(1); + const bool is_r2_or_newer = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); + const bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6(); DCHECK(second.IsConstant()); - Register out = locations->Out().AsRegister<Register>(); - Register dividend = locations->InAt(0).AsRegister<Register>(); - int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); - uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); - int ctz_imm = CTZ(abs_imm); + if (instruction->GetResultType() == DataType::Type::kInt32) { + Register out = locations->Out().AsRegister<Register>(); + Register dividend = locations->InAt(0).AsRegister<Register>(); + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); + int ctz_imm = CTZ(abs_imm); - if (instruction->IsDiv()) { - if (ctz_imm == 1) { - // Fast path for division by +/-2, which is very common. - __ Srl(TMP, dividend, 31); + if (instruction->IsDiv()) { + if (ctz_imm == 1) { + // Fast path for division by +/-2, which is very common. + __ Srl(TMP, dividend, 31); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + } + __ Addu(out, dividend, TMP); + __ Sra(out, out, ctz_imm); + if (imm < 0) { + __ Subu(out, ZERO, out); + } } else { - __ Sra(TMP, dividend, 31); - __ Srl(TMP, TMP, 32 - ctz_imm); - } - __ Addu(out, dividend, TMP); - __ Sra(out, out, ctz_imm); - if (imm < 0) { - __ Subu(out, ZERO, out); + if (ctz_imm == 1) { + // Fast path for modulo +/-2, which is very common. + __ Sra(TMP, dividend, 31); + __ Subu(out, dividend, TMP); + __ Andi(out, out, 1); + __ Addu(out, out, TMP); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + __ Addu(out, dividend, TMP); + if (IsUint<16>(abs_imm - 1)) { + __ Andi(out, out, abs_imm - 1); + } else { + if (is_r2_or_newer) { + __ Ins(out, ZERO, ctz_imm, 32 - ctz_imm); + } else { + __ Sll(out, out, 32 - ctz_imm); + __ Srl(out, out, 32 - ctz_imm); + } + } + __ Subu(out, out, TMP); + } } } else { - if (ctz_imm == 1) { - // Fast path for modulo +/-2, which is very common. - __ Sra(TMP, dividend, 31); - __ Subu(out, dividend, TMP); - __ Andi(out, out, 1); - __ Addu(out, out, TMP); + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); + Register out_high = locations->Out().AsRegisterPairHigh<Register>(); + Register out_low = locations->Out().AsRegisterPairLow<Register>(); + Register in_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register in_low = locations->InAt(0).AsRegisterPairLow<Register>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); + int ctz_imm = CTZ(abs_imm); + + if (instruction->IsDiv()) { + if (ctz_imm < 32) { + if (ctz_imm == 1) { + __ Srl(AT, in_high, 31); + } else { + __ Sra(AT, in_high, 31); + __ Srl(AT, AT, 32 - ctz_imm); + } + __ Addu(AT, AT, in_low); + __ Sltu(TMP, AT, in_low); + __ Addu(out_high, in_high, TMP); + __ Srl(out_low, AT, ctz_imm); + if (is_r2_or_newer) { + __ Ins(out_low, out_high, 32 - ctz_imm, ctz_imm); + __ Sra(out_high, out_high, ctz_imm); + } else { + __ Sll(AT, out_high, 32 - ctz_imm); + __ Sra(out_high, out_high, ctz_imm); + __ Or(out_low, out_low, AT); + } + if (imm < 0) { + __ Subu(out_low, ZERO, out_low); + __ Sltu(AT, ZERO, out_low); + __ Subu(out_high, ZERO, out_high); + __ Subu(out_high, out_high, AT); + } + } else if (ctz_imm == 32) { + __ Sra(AT, in_high, 31); + __ Addu(AT, AT, in_low); + __ Sltu(AT, AT, in_low); + __ Addu(out_low, in_high, AT); + if (imm < 0) { + __ Srl(TMP, out_low, 31); + __ Subu(out_low, ZERO, out_low); + __ Sltu(AT, ZERO, out_low); + __ Subu(out_high, TMP, AT); + } else { + __ Sra(out_high, out_low, 31); + } + } else if (ctz_imm < 63) { + __ Sra(AT, in_high, 31); + __ Srl(TMP, AT, 64 - ctz_imm); + __ Addu(AT, AT, in_low); + __ Sltu(AT, AT, in_low); + __ Addu(out_low, in_high, AT); + __ Addu(out_low, out_low, TMP); + __ Sra(out_low, out_low, ctz_imm - 32); + if (imm < 0) { + __ Subu(out_low, ZERO, out_low); + } + __ Sra(out_high, out_low, 31); + } else { + DCHECK_LT(imm, 0); + if (is_r6) { + __ Aui(AT, in_high, 0x8000); + } else { + __ Lui(AT, 0x8000); + __ Xor(AT, AT, in_high); + } + __ Or(AT, AT, in_low); + __ Sltiu(out_low, AT, 1); + __ Move(out_high, ZERO); + } } else { - __ Sra(TMP, dividend, 31); - __ Srl(TMP, TMP, 32 - ctz_imm); - __ Addu(out, dividend, TMP); - if (IsUint<16>(abs_imm - 1)) { - __ Andi(out, out, abs_imm - 1); + if ((ctz_imm == 1) && !is_r6) { + __ Andi(AT, in_low, 1); + __ Sll(TMP, in_low, 31); + __ And(TMP, in_high, TMP); + __ Sra(out_high, TMP, 31); + __ Or(out_low, out_high, AT); + } else if (ctz_imm < 32) { + __ Sra(AT, in_high, 31); + if (ctz_imm <= 16) { + __ Andi(out_low, in_low, abs_imm - 1); + } else if (is_r2_or_newer) { + __ Ext(out_low, in_low, 0, ctz_imm); + } else { + __ Sll(out_low, in_low, 32 - ctz_imm); + __ Srl(out_low, out_low, 32 - ctz_imm); + } + if (is_r6) { + __ Selnez(out_high, AT, out_low); + } else { + __ Movz(AT, ZERO, out_low); + __ Move(out_high, AT); + } + if (is_r2_or_newer) { + __ Ins(out_low, out_high, ctz_imm, 32 - ctz_imm); + } else { + __ Sll(AT, out_high, ctz_imm); + __ Or(out_low, out_low, AT); + } + } else if (ctz_imm == 32) { + __ Sra(AT, in_high, 31); + __ Move(out_low, in_low); + if (is_r6) { + __ Selnez(out_high, AT, out_low); + } else { + __ Movz(AT, ZERO, out_low); + __ Move(out_high, AT); + } + } else if (ctz_imm < 63) { + __ Sra(AT, in_high, 31); + __ Move(TMP, in_low); + if (ctz_imm - 32 <= 16) { + __ Andi(out_high, in_high, (1 << (ctz_imm - 32)) - 1); + } else if (is_r2_or_newer) { + __ Ext(out_high, in_high, 0, ctz_imm - 32); + } else { + __ Sll(out_high, in_high, 64 - ctz_imm); + __ Srl(out_high, out_high, 64 - ctz_imm); + } + __ Move(out_low, TMP); + __ Or(TMP, TMP, out_high); + if (is_r6) { + __ Selnez(AT, AT, TMP); + } else { + __ Movz(AT, ZERO, TMP); + } + if (is_r2_or_newer) { + __ Ins(out_high, AT, ctz_imm - 32, 64 - ctz_imm); + } else { + __ Sll(AT, AT, ctz_imm - 32); + __ Or(out_high, out_high, AT); + } } else { - if (codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()) { - __ Ins(out, ZERO, ctz_imm, 32 - ctz_imm); + if (is_r6) { + __ Aui(AT, in_high, 0x8000); } else { - __ Sll(out, out, 32 - ctz_imm); - __ Srl(out, out, 32 - ctz_imm); + __ Lui(AT, 0x8000); + __ Xor(AT, AT, in_high); } + __ Or(AT, AT, in_low); + __ Sltiu(AT, AT, 1); + __ Sll(AT, AT, 31); + __ Move(out_low, in_low); + __ Xor(out_high, in_high, AT); } - __ Subu(out, out, TMP); } } } @@ -3935,7 +4057,16 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* inst void LocationsBuilderMIPS::VisitDiv(HDiv* div) { DataType::Type type = div->GetResultType(); - LocationSummary::CallKind call_kind = (type == DataType::Type::kInt64) + bool call_long_div = false; + if (type == DataType::Type::kInt64) { + if (div->InputAt(1)->IsConstant()) { + int64_t imm = CodeGenerator::GetInt64ValueOf(div->InputAt(1)->AsConstant()); + call_long_div = (imm != 0) && !IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm))); + } else { + call_long_div = true; + } + } + LocationSummary::CallKind call_kind = call_long_div ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall; @@ -3949,12 +4080,18 @@ void LocationsBuilderMIPS::VisitDiv(HDiv* div) { break; case DataType::Type::kInt64: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); - locations->SetInAt(1, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - locations->SetOut(calling_convention.GetReturnLocation(type)); + if (call_long_div) { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterPairLocation( + calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, Location::RegisterPairLocation( + calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); + locations->SetOut(calling_convention.GetReturnLocation(type)); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresRegister()); + } break; } @@ -3979,8 +4116,20 @@ void InstructionCodeGeneratorMIPS::VisitDiv(HDiv* instruction) { GenerateDivRemIntegral(instruction); break; case DataType::Type::kInt64: { - codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); + if (locations->InAt(1).IsConstant()) { + int64_t imm = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue(); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else { + DCHECK(IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm)))); + DivRemByPowerOfTwo(instruction); + } + } else { + codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); + } break; } case DataType::Type::kFloat32: @@ -4094,6 +4243,12 @@ void InstructionCodeGeneratorMIPS::HandleGoto(HInstruction* got, HBasicBlock* su HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + __ Lw(AT, SP, kCurrentMethodStackOffset); + __ Lhu(TMP, AT, ArtMethod::HotnessCountOffset().Int32Value()); + __ Addiu(TMP, TMP, 1); + __ Sh(TMP, AT, ArtMethod::HotnessCountOffset().Int32Value()); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -6366,6 +6521,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, case DataType::Type::kFloat64: load_type = kLoadDoubleword; break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -6519,6 +6676,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, case DataType::Type::kFloat64: store_type = kStoreDoubleword; break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -7247,18 +7406,17 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = - kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier; + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; + } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; - case TypeCheckKind::kBitstringCheck: - break; } LocationSummary* locations = @@ -7267,13 +7425,7 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } + locations->SetInAt(1, Location::RequiresRegister()); // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -7285,7 +7437,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); - Location cls = locations->InAt(1); + Register cls = locations->InAt(1).AsRegister<Register>(); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7307,27 +7459,31 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls.AsRegister<Register>()); + __ Xor(out, out, cls); __ Sltiu(out, out, 1); break; } case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. MipsLabel loop; @@ -7337,32 +7493,34 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); - __ Bne(out, cls.AsRegister<Register>(), &loop); + __ Bne(out, cls, &loop); __ LoadConst32(out, 1); break; } case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Walk over the class hierarchy to find a match. MipsLabel loop, success; __ Bind(&loop); - __ Beq(out, cls.AsRegister<Register>(), &success); + __ Beq(out, cls, &success); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ Bnez(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -7372,23 +7530,25 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Do an exact check. MipsLabel success; - __ Beq(out, cls.AsRegister<Register>(), &success); + __ Beq(out, cls, &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -7413,7 +7573,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); - __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); + __ Bne(out, cls, slow_path->GetEntryLabel()); __ LoadConst32(out, 1); break; } @@ -7445,20 +7605,6 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { __ B(slow_path->GetEntryLabel()); break; } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, out); - __ Sltiu(out, out, 1); - break; - } } __ Bind(&done); @@ -7505,10 +7651,6 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize); - // Set the hidden argument. - __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(), - invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex()); @@ -7533,6 +7675,9 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // T9 = temp->GetEntryPoint(); __ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value()); + // Set the hidden argument. + __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(), + invoke->GetDexMethodIndex()); // T9(); __ Jalr(T9); __ NopIfNoReordering(); @@ -7695,9 +7840,9 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall( break; case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { DCHECK(GetCompilerOptions().IsBootImage()); - PcRelativePatchInfo* info_high = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + PcRelativePatchInfo* info_high = NewBootImageMethodPatch(invoke->GetTargetMethod()); PcRelativePatchInfo* info_low = - NewPcRelativeMethodPatch(invoke->GetTargetMethod(), info_high); + NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high); Register temp_reg = temp.AsRegister<Register>(); EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label); @@ -7706,6 +7851,15 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = invoke->GetDispatchInfo().method_load_data; + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high); + Register temp_reg = temp.AsRegister<Register>(); + EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); + __ Lw(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { PcRelativePatchInfo* info_high = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); @@ -7913,9 +8067,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); @@ -7939,9 +8093,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF case HLoadClass::LoadKind::kBootImageClassTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); @@ -8108,9 +8262,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); @@ -8133,9 +8287,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ case HLoadString::LoadKind::kBootImageInternTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); @@ -8561,9 +8715,16 @@ void InstructionCodeGeneratorMIPS::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) void LocationsBuilderMIPS::VisitRem(HRem* rem) { DataType::Type type = rem->GetResultType(); - LocationSummary::CallKind call_kind = (type == DataType::Type::kInt32) - ? LocationSummary::kNoCall - : LocationSummary::kCallOnMainOnly; + bool call_rem; + if ((type == DataType::Type::kInt64) && rem->InputAt(1)->IsConstant()) { + int64_t imm = CodeGenerator::GetInt64ValueOf(rem->InputAt(1)->AsConstant()); + call_rem = (imm != 0) && !IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm))); + } else { + call_rem = (type != DataType::Type::kInt32); + } + LocationSummary::CallKind call_kind = call_rem + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind); switch (type) { @@ -8574,12 +8735,18 @@ void LocationsBuilderMIPS::VisitRem(HRem* rem) { break; case DataType::Type::kInt64: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); - locations->SetInAt(1, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - locations->SetOut(calling_convention.GetReturnLocation(type)); + if (call_rem) { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterPairLocation( + calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, Location::RegisterPairLocation( + calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); + locations->SetOut(calling_convention.GetReturnLocation(type)); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresRegister()); + } break; } @@ -8599,14 +8766,27 @@ void LocationsBuilderMIPS::VisitRem(HRem* rem) { void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { DataType::Type type = instruction->GetType(); + LocationSummary* locations = instruction->GetLocations(); switch (type) { case DataType::Type::kInt32: GenerateDivRemIntegral(instruction); break; case DataType::Type::kInt64: { - codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); + if (locations->InAt(1).IsConstant()) { + int64_t imm = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue(); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else { + DCHECK(IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm)))); + DivRemByPowerOfTwo(instruction); + } + } else { + codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); + } break; } case DataType::Type::kFloat32: { @@ -8624,6 +8804,501 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + bool isR6, + DataType::Type type) { + if (isR6) { + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions + // always change the target (output) register. If the condition is + // true the output register gets the contents of the "rs" register; + // otherwise, the output register is set to zero. One consequence + // of this is that to implement something like "rd = c==0 ? rs : rt" + // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. + // After executing this pair of instructions one of the output + // registers from the pair will necessarily contain zero. Then the + // code ORs the output registers from the SELEQZ/SELNEZ instructions + // to get the final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (type == DataType::Type::kInt64) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, b_hi, a_hi); + __ Bne(b_hi, a_hi, &compare_done); + + __ Sltu(TMP, b_lo, a_lo); + + __ Bind(&compare_done); + + if (is_min) { + __ Seleqz(AT, a_lo, TMP); + __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo + // because at this point we're + // done using a_lo/b_lo. + } else { + __ Selnez(AT, a_lo, TMP); + __ Seleqz(out_lo, b_lo, TMP); // ditto + } + __ Or(out_lo, out_lo, AT); + if (is_min) { + __ Seleqz(AT, a_hi, TMP); + __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } else { + __ Selnez(AT, a_hi, TMP); + __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } + __ Or(out_hi, out_hi, AT); + } + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, b, a); + if (is_min) { + __ Seleqz(TMP, a, AT); + __ Selnez(AT, b, AT); + } else { + __ Selnez(TMP, a, AT); + __ Seleqz(AT, b, AT); + } + __ Or(out, TMP, AT); + } + } + } else { // !isR6 + if (type == DataType::Type::kInt64) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, a_hi, b_hi); + __ Bne(a_hi, b_hi, &compare_done); + + __ Sltu(TMP, a_lo, b_lo); + + __ Bind(&compare_done); + + if (is_min) { + if (out_lo != a_lo) { + __ Movn(out_hi, a_hi, TMP); + __ Movn(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movz(out_hi, b_hi, TMP); + __ Movz(out_lo, b_lo, TMP); + } + } else { + if (out_lo != a_lo) { + __ Movz(out_hi, a_hi, TMP); + __ Movz(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movn(out_hi, b_hi, TMP); + __ Movn(out_lo, b_lo, TMP); + } + } + } + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, a, b); + if (is_min) { + if (out != a) { + __ Movn(out, a, AT); + } + if (out != b) { + __ Movz(out, b, AT); + } + } else { + if (out != a) { + __ Movz(out, a, AT); + } + if (out != b) { + __ Movn(out, b, AT); + } + } + } + } + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + bool isR6, + DataType::Type type) { + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); + + if (isR6) { + MipsLabel noNaNs; + MipsLabel done; + FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == DataType::Type::kFloat64) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinD(out, a, b); + } else { + __ MaxD(out, a, b); + } + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinS(out, a, b); + } else { + __ MaxS(out, a, b); + } + } + + __ Bind(&done); + + } else { // !isR6 + MipsLabel ordered; + MipsLabel compare; + MipsLabel select; + MipsLabel done; + + if (type == DataType::Type::kFloat64) { + __ CunD(a, b); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CunS(a, b); + } + __ Bc1f(&ordered); + + // a or b (or both) is a NaN. Return one, which is a NaN. + if (type == DataType::Type::kFloat64) { + __ CeqD(b, b); + } else { + __ CeqS(b, b); + } + __ B(&select); + + __ Bind(&ordered); + + // Neither is a NaN. + // a == b? (-0.0 compares equal with +0.0) + // If equal, handle zeroes, else compare further. + if (type == DataType::Type::kFloat64) { + __ CeqD(a, b); + } else { + __ CeqS(a, b); + } + __ Bc1f(&compare); + + // a == b either bit for bit or one is -0.0 and the other is +0.0. + if (type == DataType::Type::kFloat64) { + __ MoveFromFpuHigh(TMP, a); + __ MoveFromFpuHigh(AT, b); + } else { + __ Mfc1(TMP, a); + __ Mfc1(AT, b); + } + + if (is_min) { + // -0.0 prevails over +0.0. + __ Or(TMP, TMP, AT); + } else { + // +0.0 prevails over -0.0. + __ And(TMP, TMP, AT); + } + + if (type == DataType::Type::kFloat64) { + __ Mfc1(AT, a); + __ Mtc1(AT, out); + __ MoveToFpuHigh(TMP, out); + } else { + __ Mtc1(TMP, out); + } + __ B(&done); + + __ Bind(&compare); + + if (type == DataType::Type::kFloat64) { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeD(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeD(b, a); // b <= a + } + } else { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeS(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeS(b, a); // b <= a + } + } + + __ Bind(&select); + + if (type == DataType::Type::kFloat64) { + __ MovtD(out, a); + __ MovfD(out, b); + } else { + __ MovtS(out, a); + __ MovfS(out, b); + } + + __ Bind(&done); + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, isR6, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, isR6, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderMIPS::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorMIPS::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderMIPS::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorMIPS::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderMIPS::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS::GenerateAbsFP(LocationSummary* locations, + DataType::Type type, + bool isR2OrNewer, + bool isR6) { + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + // Note, as a "quality of implementation", rather than pure "spec compliance", we require that + // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN + // (signaling NaN may become quiet though). + // + // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case, + // both regular floating point numbers and NAN values are treated alike, only the sign bit is + // affected by this instruction. + // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any + // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be + // changed when doing abs(NaN). Because of that, we clear sign bit in a different way. + if (isR6) { + if (type == DataType::Type::kFloat64) { + __ AbsD(out, in); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ AbsS(out, in); + } + } else { + if (type == DataType::Type::kFloat64) { + if (in != out) { + __ MovD(out, in); + } + __ MoveFromFpuHigh(TMP, in); + // ins instruction is not available for R1. + if (isR2OrNewer) { + __ Ins(TMP, ZERO, 31, 1); + } else { + __ Sll(TMP, TMP, 1); + __ Srl(TMP, TMP, 1); + } + __ MoveToFpuHigh(TMP, out); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ Mfc1(TMP, in); + // ins instruction is not available for R1. + if (isR2OrNewer) { + __ Ins(TMP, ZERO, 31, 1); + } else { + __ Sll(TMP, TMP, 1); + __ Srl(TMP, TMP, 1); + } + __ Mtc1(TMP, out); + } + } +} + +void InstructionCodeGeneratorMIPS::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + bool isR2OrNewer = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + break; + } + case DataType::Type::kInt64: { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + // The comments in this section show the analogous operations which would + // be performed if we had 64-bit registers "in", and "out". + // __ Dsra32(AT, in, 31); + __ Sra(AT, in_hi, 31); + // __ Xor(out, in, AT); + __ Xor(TMP, in_lo, AT); + __ Xor(out_hi, in_hi, AT); + // __ Dsubu(out, out, AT); + __ Subu(out_lo, TMP, AT); + __ Sltu(TMP, out_lo, TMP); + __ Addu(out_hi, out_hi, TMP); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateAbsFP(locations, abs->GetResultType(), isR2OrNewer, isR6); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index ffeb3b00a7..2be8e2ead9 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -237,7 +237,6 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); - void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); @@ -247,6 +246,11 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); + void GenerateMinMax(HBinaryOperation*, bool is_min); + void GenerateAbsFP(LocationSummary* locations, DataType::Type type, bool isR2OrNewer, bool isR6); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -577,8 +581,9 @@ class CodeGeneratorMIPS : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; - // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays - // and boot image strings. The only difference is the interpretation of the offset_or_index. + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + // // The 16-bit halves of the 32-bit PC-relative offset are patched separately, necessitating // two patches/infos. There can be more than two patches/infos if the instruction supplying // the high half is shared with e.g. a slow path, while the low half is supplied by separate @@ -593,21 +598,14 @@ class CodeGeneratorMIPS : public CodeGenerator { // ... // sw r2, low(r1) // patch // b back - struct PcRelativePatchInfo { - PcRelativePatchInfo(const DexFile& dex_file, + struct PcRelativePatchInfo : PatchInfo<MipsLabel> { + PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx, const PcRelativePatchInfo* info_high) - : target_dex_file(dex_file), - offset_or_index(off_or_idx), - label(), + : PatchInfo<MipsLabel>(dex_file, off_or_idx), pc_rel_label(), patch_info_high(info_high) { } - const DexFile& target_dex_file; - // Either the dex cache array element offset or the string/type index. - uint32_t offset_or_index; - // Label for the instruction to patch. - MipsLabel label; // Label for the instruction corresponding to PC+0. Not bound or used in low half patches. // Not bound in high half patches on R2 when using HMipsComputeBaseMethodAddress. // Bound in high half patches on R2 when using the NAL instruction instead of @@ -622,19 +620,21 @@ class CodeGeneratorMIPS : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; - PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, - dex::TypeIndex type_index, + PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, + dex::TypeIndex type_index, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high = nullptr); @@ -676,7 +676,7 @@ class CodeGeneratorMIPS : public CodeGenerator { using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); - PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file, uint32_t offset_or_index, const PcRelativePatchInfo* info_high, ArenaDeque<PcRelativePatchInfo>* patches); @@ -696,16 +696,16 @@ class CodeGeneratorMIPS : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 3ae8a30754..d08a0658ec 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -55,8 +55,10 @@ Location Mips64ReturnLocation(DataType::Type return_type) { case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: + case DataType::Type::kUint32: case DataType::Type::kInt32: case DataType::Type::kReference: + case DataType::Type::kUint64: case DataType::Type::kInt64: return Location::RegisterLocation(V0); @@ -350,7 +352,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); - if (!is_fatal_) { + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { SaveLiveRegisters(codegen, locations); } @@ -960,11 +962,11 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1079,6 +1081,12 @@ static dwarf::Reg DWARFReg(FpuRegister reg) { void CodeGeneratorMIPS64::GenerateFrameEntry() { __ Bind(&frame_entry_label_); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + __ Lhu(TMP, kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()); + __ Addiu(TMP, TMP, 1); + __ Sh(TMP, kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()); + } + bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kMips64) || !IsLeafMethod(); @@ -1491,39 +1499,48 @@ inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, ArenaVector<linker::LinkerPatch>* linker_patches) { for (const PcRelativePatchInfo& info : infos) { - const DexFile& dex_file = info.target_dex_file; + const DexFile* dex_file = info.target_dex_file; size_t offset_or_index = info.offset_or_index; DCHECK(info.label.IsBound()); uint32_t literal_offset = __ GetLabelLocation(&info.label); const PcRelativePatchInfo& info_high = info.patch_info_high ? *info.patch_info_high : info; uint32_t pc_rel_offset = __ GetLabelLocation(&info_high.label); - linker_patches->push_back(Factory(literal_offset, &dex_file, pc_rel_offset, offset_or_index)); + linker_patches->push_back(Factory(literal_offset, dex_file, pc_rel_offset, offset_or_index)); } } +linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. + return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_method_patches_.size() + + boot_image_method_patches_.size() + method_bss_entry_patches_.size() + - pc_relative_type_patches_.size() + + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - pc_relative_string_patches_.size() + + boot_image_string_patches_.size() + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( - pc_relative_method_patches_, linker_patches); + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } else { - DCHECK(pc_relative_method_patches_.empty()); + EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1534,54 +1551,58 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li DCHECK_EQ(size, linker_patches->size()); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeMethodPatch( +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageRelRoPatch( + uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_); +} + +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageMethodPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - info_high, - &pc_relative_method_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &boot_image_method_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewMethodBssEntryPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - info_high, - &method_bss_entry_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &method_bss_entry_patches_); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch( +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, type_index.index_, info_high, &pc_relative_type_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &boot_image_type_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewTypeBssEntryPatch( const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, type_index.index_, info_high, &type_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &type_bss_entry_patches_); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch( +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageStringPatch( const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, string_index.index_, info_high, &pc_relative_string_patches_); + return NewPcRelativePatch( + &dex_file, string_index.index_, info_high, &boot_image_string_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewStringBssEntryPatch( const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, string_index.index_, info_high, &string_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &string_bss_entry_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativePatch( - const DexFile& dex_file, + const DexFile* dex_file, uint32_t offset_or_index, const PcRelativePatchInfo* info_high, ArenaDeque<PcRelativePatchInfo>* patches) { @@ -1768,41 +1789,13 @@ void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCo enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset); - __ LoadConst32(AT, shifted_initialized_value); - __ Bltuc(TMP, AT, slow_path->GetEntryLabel()); + __ Sltiu(TMP, TMP, shifted_initialized_value); + __ Bnezc(TMP, slow_path->GetEntryLabel()); // Even if the initialized flag is set, we need to ensure consistent memory ordering. __ Sync(0); __ Bind(slow_path->GetExitLabel()); } -void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, - GpuRegister temp) { - uint32_t path_to_root = check->GetBitstringPathToRoot(); - uint32_t mask = check->GetBitstringMask(); - DCHECK(IsPowerOfTwo(mask + 1)); - size_t mask_bits = WhichPowerOf2(mask + 1); - - if (mask_bits == 16u) { - // Load only the bitstring part of the status word. - __ LoadFromOffset( - kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); - // Compare the bitstring bits using XOR. - __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); - } else { - // /* uint32_t */ temp = temp->status_ - __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); - // Compare the bitstring bits using XOR. - if (IsUint<16>(path_to_root)) { - __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); - } else { - __ LoadConst32(TMP, path_to_root); - __ Xor(temp, temp, TMP); - } - // Shift out bits that do not contribute to the comparison. - __ Sll(temp, temp, 32 - mask_bits); - } -} - void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { __ Sync(0); // only stype 0 is supported } @@ -2432,6 +2425,8 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -2735,6 +2730,8 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -2854,38 +2851,12 @@ static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { } void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = (throws_into_catch || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. - break; - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; - case TypeCheckKind::kBitstringCheck: - break; - } - + LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } + locations->SetInAt(1, Location::RequiresRegister()); locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -2894,7 +2865,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - Location cls = locations->InAt(1); + GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); Location temp_loc = locations->GetTemp(0); GpuRegister temp = temp_loc.AsRegister<GpuRegister>(); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -2910,18 +2881,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); Mips64Label done; - // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases - // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding - // read barriers is done for performance and code size reasons. - bool is_type_check_slow_path_fatal = false; - if (!kEmitCompilerReadBarrier) { - is_type_check_slow_path_fatal = - (type_check_kind == TypeCheckKind::kExactCheck || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck) && - !instruction->CanThrowIntoCatchBlock(); - } + bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); SlowPathCodeMIPS64* slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( instruction, is_type_check_slow_path_fatal); @@ -2944,7 +2904,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); + __ Bnec(temp, cls, slow_path->GetEntryLabel()); break; } @@ -2970,7 +2930,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // exception. __ Beqzc(temp, slow_path->GetEntryLabel()); // Otherwise, compare the classes. - __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop); + __ Bnec(temp, cls, &loop); break; } @@ -2985,7 +2945,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // Walk over the class hierarchy to find a match. Mips64Label loop; __ Bind(&loop); - __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); + __ Beqc(temp, cls, &done); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, @@ -3008,7 +2968,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { maybe_temp2_loc, kWithoutReadBarrier); // Do an exact check. - __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); + __ Beqc(temp, cls, &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -3067,21 +3027,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { __ Daddiu(temp, temp, 2 * kHeapReferenceSize); __ Addiu(TMP, TMP, -2); // Compare the classes and continue the loop if they do not match. - __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop); - break; - } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - maybe_temp2_loc, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, temp); - __ Bnezc(temp, slow_path->GetEntryLabel()); + __ Bnec(AT, cls, &loop); break; } } @@ -3622,6 +3568,12 @@ void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock* HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + __ Ld(AT, SP, kCurrentMethodStackOffset); + __ Lhu(TMP, AT, ArtMethod::HotnessCountOffset().Int32Value()); + __ Addiu(TMP, TMP, 1); + __ Sh(TMP, AT, ArtMethod::HotnessCountOffset().Int32Value()); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -4844,6 +4796,8 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, case DataType::Type::kReference: load_type = kLoadUnsignedWord; break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -4937,6 +4891,8 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, case DataType::Type::kFloat64: store_type = kStoreDoubleword; break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -5564,18 +5520,17 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = - kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier; + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; + } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; - case TypeCheckKind::kBitstringCheck: - break; } LocationSummary* locations = @@ -5584,13 +5539,7 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - } + locations->SetInAt(1, Location::RequiresRegister()); // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -5602,7 +5551,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - Location cls = locations->InAt(1); + GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); Location out_loc = locations->Out(); GpuRegister out = out_loc.AsRegister<GpuRegister>(); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -5624,27 +5573,31 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls.AsRegister<GpuRegister>()); + __ Xor(out, out, cls); __ Sltiu(out, out, 1); break; } case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. Mips64Label loop; @@ -5654,32 +5607,34 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); - __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop); + __ Bnec(out, cls, &loop); __ LoadConst32(out, 1); break; } case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Walk over the class hierarchy to find a match. Mips64Label loop, success; __ Bind(&loop); - __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); + __ Beqc(out, cls, &success); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ Bnezc(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ Bc(&done); @@ -5689,23 +5644,25 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Do an exact check. Mips64Label success; - __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); + __ Beqc(out, cls, &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -5730,7 +5687,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); - __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); + __ Bnec(out, cls, slow_path->GetEntryLabel()); __ LoadConst32(out, 1); break; } @@ -5762,20 +5719,6 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { __ Bc(slow_path->GetEntryLabel()); break; } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - maybe_temp_loc, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, out); - __ Sltiu(out, out, 1); - break; - } } __ Bind(&done); @@ -5987,9 +5930,9 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { DCHECK(GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + NewBootImageMethodPatch(invoke->GetTargetMethod()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - NewPcRelativeMethodPatch(invoke->GetTargetMethod(), info_high); + NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high); EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); break; @@ -5999,6 +5942,15 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall( kLoadDoubleword, DeduplicateUint64Literal(invoke->GetMethodAddress())); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = invoke->GetDispatchInfo().method_load_data; + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + __ Lwu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { PcRelativePatchInfo* info_high = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); @@ -6169,9 +6121,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Daddiu(out, AT, /* placeholder */ 0x5678); break; @@ -6189,9 +6141,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S case HLoadClass::LoadKind::kBootImageClassTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Lwu(out, AT, /* placeholder */ 0x5678); // Extract the reference from the slot data, i.e. clear the hash bits. @@ -6305,9 +6257,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Daddiu(out, AT, /* placeholder */ 0x5678); return; @@ -6324,9 +6276,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA case HLoadString::LoadKind::kBootImageInternTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Lwu(out, AT, /* placeholder */ 0x5678); return; @@ -6738,6 +6690,236 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMaxInt(LocationSummary* locations, bool is_min) { + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (lhs == rhs) { + if (out != lhs) { + __ Move(out, lhs); + } + } else { + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always + // change the target (output) register. If the condition is true the + // output register gets the contents of the "rs" register; otherwise, + // the output register is set to zero. One consequence of this is + // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 + // needs to use a pair of SELEQZ/SELNEZ instructions. After + // executing this pair of instructions one of the output registers + // from the pair will necessarily contain zero. Then the code ORs the + // output registers from the SELEQZ/SELNEZ instructions to get the + // final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (out == lhs) { + __ Slt(AT, rhs, lhs); + if (is_min) { + __ Seleqz(out, lhs, AT); + __ Selnez(AT, rhs, AT); + } else { + __ Selnez(out, lhs, AT); + __ Seleqz(AT, rhs, AT); + } + } else { + __ Slt(AT, lhs, rhs); + if (is_min) { + __ Seleqz(out, rhs, AT); + __ Selnez(AT, lhs, AT); + } else { + __ Selnez(out, rhs, AT); + __ Seleqz(AT, lhs, AT); + } + } + __ Or(out, out, AT); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + Mips64Label noNaNs; + Mips64Label done; + FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == DataType::Type::kFloat64) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinD(out, a, b); + } else { + __ MaxD(out, a, b); + } + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinS(out, a, b); + } else { + __ MaxS(out, a, b); + } + } + + __ Bind(&done); +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderMIPS64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorMIPS64::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderMIPS64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorMIPS64::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderMIPS64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS64::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + break; + } + case DataType::Type::kInt64: { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + __ Dsra32(AT, in, 31); + __ Xor(out, in, AT); + __ Dsubu(out, out, AT); + break; + } + case DataType::Type::kFloat32: { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + __ AbsS(out, in); + break; + } + case DataType::Type::kFloat64: { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + __ AbsD(out, in); + break; + } + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 87d5a9c15a..5d40307cc7 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -233,7 +233,6 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg); - void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); @@ -243,6 +242,10 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -556,9 +559,9 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; - // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays, - // boot image strings and method calls. The only difference is the interpretation of - // the offset_or_index. + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + // // The 16-bit halves of the 32-bit PC-relative offset are patched separately, necessitating // two patches/infos. There can be more than two patches/infos if the instruction supplying // the high half is shared with e.g. a slow path, while the low half is supplied by separate @@ -572,20 +575,13 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // ... // sw r2, low(r1) // patch // bc back - struct PcRelativePatchInfo { - PcRelativePatchInfo(const DexFile& dex_file, + struct PcRelativePatchInfo : PatchInfo<Mips64Label> { + PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx, const PcRelativePatchInfo* info_high) - : target_dex_file(dex_file), - offset_or_index(off_or_idx), - label(), + : PatchInfo<Mips64Label>(dex_file, off_or_idx), patch_info_high(info_high) { } - const DexFile& target_dex_file; - // Either the dex cache array element offset or the string/type/method index. - uint32_t offset_or_index; - // Label for the instruction to patch. - Mips64Label label; // Pointer to the info for the high half patch or nullptr if this is the high half patch info. const PcRelativePatchInfo* patch_info_high; @@ -594,19 +590,21 @@ class CodeGeneratorMIPS64 : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; - PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method, - const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, - dex::TypeIndex type_index, + PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, + dex::TypeIndex type_index, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high = nullptr); @@ -640,7 +638,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateUint64Literal(uint64_t value); - PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file, uint32_t offset_or_index, const PcRelativePatchInfo* info_high, ArenaDeque<PcRelativePatchInfo>* patches); @@ -663,16 +661,16 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // Deduplication map for 64-bit literals, used for non-patchable method address or method code // address. Uint64ToLiteralMap uint64_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 152a59c208..174efdf115 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -606,22 +606,20 @@ void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H()); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S()); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S()); - } else { - __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S()); - } + __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S()); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S()); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: @@ -656,22 +654,20 @@ void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H()); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S()); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S()); - } else { - __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S()); - } + __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S()); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S()); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index cc470ddb2e..7c3155ab73 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -431,13 +431,13 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Vmin(DataTypeValue::S16, dst, lhs, rhs); break; + case DataType::Type::kUint32: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vmin(DataTypeValue::U32, dst, lhs, rhs); + break; case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Vmin(DataTypeValue::U32, dst, lhs, rhs); - } else { - __ Vmin(DataTypeValue::S32, dst, lhs, rhs); - } + __ Vmin(DataTypeValue::S32, dst, lhs, rhs); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -471,13 +471,13 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Vmax(DataTypeValue::S16, dst, lhs, rhs); break; + case DataType::Type::kUint32: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vmax(DataTypeValue::U32, dst, lhs, rhs); + break; case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Vmax(DataTypeValue::U32, dst, lhs, rhs); - } else { - __ Vmax(DataTypeValue::S32, dst, lhs, rhs); - } + __ Vmax(DataTypeValue::S32, dst, lhs, rhs); break; default: LOG(FATAL) << "Unsupported SIMD type"; diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index 3cf150a6b8..ed9de96496 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -613,32 +613,30 @@ void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ Min_sH(dst, lhs, rhs); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Min_uW(dst, lhs, rhs); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uW(dst, lhs, rhs); - } else { - __ Min_sW(dst, lhs, rhs); - } + __ Min_sW(dst, lhs, rhs); + break; + case DataType::Type::kUint64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Min_uD(dst, lhs, rhs); break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uD(dst, lhs, rhs); - } else { - __ Min_sD(dst, lhs, rhs); - } + __ Min_sD(dst, lhs, rhs); break; // When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value. // TODO: Fix min(x, NaN) cases for float and double. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FminW(dst, lhs, rhs); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FminD(dst, lhs, rhs); break; default: @@ -673,32 +671,30 @@ void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ Max_sH(dst, lhs, rhs); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Max_uW(dst, lhs, rhs); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uW(dst, lhs, rhs); - } else { - __ Max_sW(dst, lhs, rhs); - } + __ Max_sW(dst, lhs, rhs); + break; + case DataType::Type::kUint64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Max_uD(dst, lhs, rhs); break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uD(dst, lhs, rhs); - } else { - __ Max_sD(dst, lhs, rhs); - } + __ Max_sD(dst, lhs, rhs); break; // When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value. // TODO: Fix max(x, NaN) cases for float and double. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FmaxW(dst, lhs, rhs); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FmaxD(dst, lhs, rhs); break; default: diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 2d69533f21..9ea55ec8d7 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -612,32 +612,30 @@ void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ Min_sH(dst, lhs, rhs); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Min_uW(dst, lhs, rhs); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uW(dst, lhs, rhs); - } else { - __ Min_sW(dst, lhs, rhs); - } + __ Min_sW(dst, lhs, rhs); + break; + case DataType::Type::kUint64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Min_uD(dst, lhs, rhs); break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uD(dst, lhs, rhs); - } else { - __ Min_sD(dst, lhs, rhs); - } + __ Min_sD(dst, lhs, rhs); break; // When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value. // TODO: Fix min(x, NaN) cases for float and double. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FminW(dst, lhs, rhs); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FminD(dst, lhs, rhs); break; default: @@ -672,32 +670,30 @@ void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ Max_sH(dst, lhs, rhs); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Max_uW(dst, lhs, rhs); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uW(dst, lhs, rhs); - } else { - __ Max_sW(dst, lhs, rhs); - } + __ Max_sW(dst, lhs, rhs); + break; + case DataType::Type::kUint64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Max_uD(dst, lhs, rhs); break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uD(dst, lhs, rhs); - } else { - __ Max_sD(dst, lhs, rhs); - } + __ Max_sD(dst, lhs, rhs); break; // When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value. // TODO: Fix max(x, NaN) cases for float and double. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FmaxW(dst, lhs, rhs); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FmaxD(dst, lhs, rhs); break; default: diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 7b4b85d2fe..f2ffccc887 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -640,23 +640,21 @@ void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ pminsw(dst, src); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pminud(dst, src); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pminud(dst, src); - } else { - __ pminsd(dst, src); - } + __ pminsd(dst, src); break; // Next cases are sloppy wrt 0.0 vs -0.0. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ minps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ minpd(dst, src); break; default: @@ -691,23 +689,21 @@ void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ pmaxsw(dst, src); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pmaxud(dst, src); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pmaxud(dst, src); - } else { - __ pmaxsd(dst, src); - } + __ pmaxsd(dst, src); break; // Next cases are sloppy wrt 0.0 vs -0.0. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ maxps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ maxpd(dst, src); break; default: diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 107030e6c2..e2b0485f89 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -623,23 +623,21 @@ void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ pminsw(dst, src); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pminud(dst, src); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pminud(dst, src); - } else { - __ pminsd(dst, src); - } + __ pminsd(dst, src); break; // Next cases are sloppy wrt 0.0 vs -0.0. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ minps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ minpd(dst, src); break; default: @@ -674,23 +672,21 @@ void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ pmaxsw(dst, src); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pmaxud(dst, src); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pmaxud(dst, src); - } else { - __ pmaxsd(dst, src); - } + __ pmaxsd(dst, src); break; // Next cases are sloppy wrt 0.0 vs -0.0. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ maxps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ maxpd(dst, src); break; default: diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index e85f9001b1..528930a503 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -51,6 +51,9 @@ static constexpr int kC2ConditionMask = 0x400; static constexpr int kFakeReturnRegister = Register(8); +static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); +static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value() @@ -1028,7 +1031,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1061,6 +1064,11 @@ void CodeGeneratorX86::GenerateFrameEntry() { IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + __ addw(Address(kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()), + Immediate(1)); + } + if (!skip_overflow_check) { size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86); __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes))); @@ -1129,9 +1137,11 @@ Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: + case DataType::Type::kUint32: case DataType::Type::kInt32: return Location::RegisterLocation(EAX); + case DataType::Type::kUint64: case DataType::Type::kInt64: return Location::RegisterPairLocation(EAX, EDX); @@ -1201,6 +1211,8 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type ty } } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; break; @@ -1357,6 +1369,12 @@ void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* suc HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + __ pushl(EAX); + __ movl(EAX, Address(ESP, kX86WordSize)); + __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1)); + __ popl(EAX); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -3787,6 +3805,301 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + // Register to use to perform a long subtract to set cc. + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + if (type == DataType::Type::kInt64) { + // Need to perform a subtract to get the sign right. + // op1 is already in the same location as the output. + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + + Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); + Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); + + // The comparison is performed by subtracting the second operand from + // the first operand and then setting the status flags in the same + // manner as the SUB instruction." + __ cmpl(output_lo, op2_lo); + + // Now use a temp and the borrow to finish the subtraction of op2_hi. + Register temp = locations->GetTemp(0).AsRegister<Register>(); + __ movl(temp, output_hi); + __ sbbl(temp, op2_hi); + + // Now the condition code is correct. + Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; + __ cmovl(cond, output_lo, op2_lo); + __ cmovl(cond, output_hi, op2_hi); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register out = locations->Out().AsRegister<Register>(); + Register op2 = op2_loc.AsRegister<Register>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + __ cmpl(out, op2); + Condition cond = is_min ? Condition::kGreater : Condition::kLess; + __ cmovl(cond, out, op2); + } +} + +void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + NearLabel nan, done, op2_label; + if (type == DataType::Type::kFloat64) { + __ ucomisd(out, op2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (type == DataType::Type::kFloat64) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (type == DataType::Type::kFloat64) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + if (type == DataType::Type::kFloat64) { + // TODO: Use a constant from the constant table (requires extra input). + __ LoadLongConstant(out, kDoubleNaN); + } else { + Register constant = locations->GetTemp(0).AsRegister<Register>(); + __ movl(constant, Immediate(kFloatNaN)); + __ movd(out, constant); + } + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (type == DataType::Type::kFloat64) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderX86::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorX86::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderX86::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorX86::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderX86::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RegisterLocation(EAX)); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RegisterLocation(EDX)); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + Register out = locations->Out().AsRegister<Register>(); + DCHECK_EQ(out, EAX); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + DCHECK_EQ(temp, EDX); + // Sign extend EAX into EDX. + __ cdq(); + // XOR EAX with sign. + __ xorl(EAX, EDX); + // Subtract out sign to correct. + __ subl(EAX, EDX); + // The result is in EAX. + break; + } + case DataType::Type::kInt64: { + Location input = locations->InAt(0); + Register input_lo = input.AsRegisterPairLow<Register>(); + Register input_hi = input.AsRegisterPairHigh<Register>(); + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + // Compute the sign into the temporary. + __ movl(temp, input_hi); + __ sarl(temp, Immediate(31)); + // Store the sign into the output. + __ movl(output_lo, temp); + __ movl(output_hi, temp); + // XOR the input to the output. + __ xorl(output_lo, input_lo); + __ xorl(output_hi, input_hi); + // Subtract the sign. + __ subl(output_lo, temp); + __ sbbl(output_hi, temp); + break; + } + case DataType::Type::kFloat32: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + Register constant = locations->GetTemp(1).AsRegister<Register>(); + __ movl(constant, Immediate(INT32_C(0x7FFFFFFF))); + __ movd(temp, constant); + __ andps(out, temp); + break; + } + case DataType::Type::kFloat64: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // TODO: Use a constant from the constant table (requires extra input). + __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF)); + __ andpd(out, temp); + break; + } + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); switch (instruction->GetType()) { @@ -4513,20 +4826,26 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall( Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()); __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset)); - RecordBootMethodPatch(invoke); + RecordBootImageMethodPatch(invoke); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()); __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); - // Bind a new fixup label at the end of the "movl" insn. - __ Bind(NewMethodBssEntryPatch( + RecordBootImageRelRoPatch( invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(), - MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()))); + invoke->GetDispatchInfo().method_load_data); + break; + } + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { + Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, + temp.AsRegister<Register>()); + __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); + RecordMethodBssEntryPatch(invoke); break; } case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { @@ -4583,56 +4902,62 @@ void CodeGeneratorX86::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } -void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { +void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t boot_image_offset) { + boot_image_method_patches_.emplace_back( + method_address, /* target_dex_file */ nullptr, boot_image_offset); + __ Bind(&boot_image_method_patches_.back().label); +} + +void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); - HX86ComputeBaseMethodAddress* address = + HX86ComputeBaseMethodAddress* method_address = invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); - boot_image_method_patches_.emplace_back(address, - *invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().index); + boot_image_method_patches_.emplace_back( + method_address, invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index); __ Bind(&boot_image_method_patches_.back().label); } -Label* CodeGeneratorX86::NewMethodBssEntryPatch( - HX86ComputeBaseMethodAddress* method_address, - MethodReference target_method) { +void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); // Add the patch entry and bind its label at the end of the instruction. - method_bss_entry_patches_.emplace_back(method_address, - *target_method.dex_file, - target_method.index); - return &method_bss_entry_patches_.back().label; + method_bss_entry_patches_.emplace_back( + method_address, &GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); + __ Bind(&method_bss_entry_patches_.back().label); } -void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) { - HX86ComputeBaseMethodAddress* address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); - boot_image_type_patches_.emplace_back(address, - load_class->GetDexFile(), - load_class->GetTypeIndex().index_); +void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) { + HX86ComputeBaseMethodAddress* method_address = + load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); + boot_image_type_patches_.emplace_back( + method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_); __ Bind(&boot_image_type_patches_.back().label); } Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) { - HX86ComputeBaseMethodAddress* address = + HX86ComputeBaseMethodAddress* method_address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); type_bss_entry_patches_.emplace_back( - address, load_class->GetDexFile(), load_class->GetTypeIndex().index_); + method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_); return &type_bss_entry_patches_.back().label; } -void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { - HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); - string_patches_.emplace_back(address, - load_string->GetDexFile(), - load_string->GetStringIndex().index_); - __ Bind(&string_patches_.back().label); +void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) { + HX86ComputeBaseMethodAddress* method_address = + load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); + boot_image_string_patches_.emplace_back( + method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_); + __ Bind(&boot_image_string_patches_.back().label); } Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); - HX86ComputeBaseMethodAddress* address = + HX86ComputeBaseMethodAddress* method_address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); string_bss_entry_patches_.emplace_back( - address, load_string->GetDexFile(), load_string->GetStringIndex().index_); + method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_); return &string_bss_entry_patches_.back().label; } @@ -4646,11 +4971,21 @@ inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( ArenaVector<linker::LinkerPatch>* linker_patches) { for (const X86PcRelativePatchInfo& info : infos) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(Factory( - literal_offset, &info.dex_file, GetMethodAddressOffset(info.method_address), info.index)); + linker_patches->push_back(Factory(literal_offset, + info.target_dex_file, + GetMethodAddressOffset(info.method_address), + info.offset_or_index)); } } +linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. + return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -4658,7 +4993,7 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - string_patches_.size() + + boot_image_string_patches_.size() + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -4667,13 +5002,14 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); + EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -4833,6 +5169,8 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << load_type; UNREACHABLE(); @@ -5006,6 +5344,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); @@ -5309,6 +5649,8 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -5560,6 +5902,8 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -6095,7 +6439,7 @@ Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file, Handle<mirror::Class> handle) { ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); // Add a patch entry and return the label. - jit_class_patches_.emplace_back(dex_file, type_index.index_); + jit_class_patches_.emplace_back(&dex_file, type_index.index_); PatchInfo<Label>* info = &jit_class_patches_.back(); return &info->label; } @@ -6137,7 +6481,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootTypePatch(cls); + codegen_->RecordBootImageTypePatch(cls); break; } case HLoadClass::LoadKind::kBootImageAddress: { @@ -6152,7 +6496,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootTypePatch(cls); + codegen_->RecordBootImageTypePatch(cls); // Extract the reference from the slot data, i.e. clear the hash bits. int32_t masked_hash = ClassTable::TableSlot::MaskHash( ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); @@ -6234,27 +6578,6 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( // No need for memory fence, thanks to the X86 memory model. } -void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, - Register temp) { - uint32_t path_to_root = check->GetBitstringPathToRoot(); - uint32_t mask = check->GetBitstringMask(); - DCHECK(IsPowerOfTwo(mask + 1)); - size_t mask_bits = WhichPowerOf2(mask + 1); - - if ((false) && mask_bits == 16u) { - // FIXME: cmpw() erroneously emits the constant as 32 bits instead of 16 bits. b/71853552 - // Compare the bitstring in memory. - __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); - } else { - // /* uint32_t */ temp = temp->status_ - __ movl(temp, Address(temp, mirror::Class::StatusOffset())); - // Compare the bitstring bits using SUB. - __ subl(temp, Immediate(path_to_root)); - // Shift out bits that do not contribute to the comparison. - __ shll(temp, Immediate(32u - mask_bits)); - } -} - HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { @@ -6305,7 +6628,7 @@ Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file, Handle<mirror::String> handle) { ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); // Add a patch entry and return the label. - jit_string_patches_.emplace_back(dex_file, string_index.index_); + jit_string_patches_.emplace_back(&dex_file, string_index.index_); PatchInfo<Label>* info = &jit_string_patches_.back(); return &info->label; } @@ -6322,7 +6645,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S DCHECK(codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootStringPatch(load); + codegen_->RecordBootImageStringPatch(load); return; } case HLoadString::LoadKind::kBootImageAddress: { @@ -6336,7 +6659,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootStringPatch(load); + codegen_->RecordBootImageStringPatch(load); return; } case HLoadString::LoadKind::kBssEntry: { @@ -6447,8 +6770,6 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; - case TypeCheckKind::kBitstringCheck: - break; } LocationSummary* locations = @@ -6457,13 +6778,7 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::Any()); - } + locations->SetInAt(1, Location::Any()); // Note that TypeCheckSlowPathX86 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for some cases. @@ -6684,21 +6999,6 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, out); - __ j(kNotEqual, &zero); - __ movl(out, Immediate(1)); - __ jmp(&done); - break; - } } if (zero.IsLinked()) { @@ -6725,10 +7025,6 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { // Require a register for the interface check since there is a loop that compares the class to // a memory address. locations->SetInAt(1, Location::RequiresRegister()); - } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); } else { locations->SetInAt(1, Location::Any()); } @@ -6948,19 +7244,6 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>()); break; } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, temp); - __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); - break; - } } __ Bind(&done); @@ -7868,13 +8151,13 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code, void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const PatchInfo<Label>& info : jit_string_patches_) { - StringReference string_reference(&info.dex_file, dex::StringIndex(info.index)); + StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index)); uint64_t index_in_table = GetJitStringRootIndex(string_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } for (const PatchInfo<Label>& info : jit_class_patches_) { - TypeReference type_reference(&info.dex_file, dex::TypeIndex(info.index)); + TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index)); uint64_t index_in_table = GetJitClassRootIndex(type_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 2d14d4cdc2..2dc34e859e 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -211,7 +211,6 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { // the suspend call. void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg); - void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void DivRemOneOrMinusOne(HBinaryOperation* instruction); @@ -226,6 +225,9 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void GenerateShlLong(const Location& loc, int shift); void GenerateShrLong(const Location& loc, int shift); void GenerateUShrLong(const Location& loc, int shift); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, @@ -415,12 +417,13 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateVirtualCall( HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; - void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); - Label* NewMethodBssEntryPatch(HX86ComputeBaseMethodAddress* method_address, - MethodReference target_method); - void RecordBootTypePatch(HLoadClass* load_class); + void RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t boot_image_offset); + void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); + void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke); + void RecordBootImageTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); - void RecordBootStringPatch(HLoadString* load_string); + void RecordBootImageStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex string_index, @@ -611,7 +614,7 @@ class CodeGeneratorX86 : public CodeGenerator { private: struct X86PcRelativePatchInfo : PatchInfo<Label> { X86PcRelativePatchInfo(HX86ComputeBaseMethodAddress* address, - const DexFile& target_dex_file, + const DexFile* target_dex_file, uint32_t target_index) : PatchInfo(target_dex_file, target_index), method_address(address) {} @@ -633,7 +636,7 @@ class CodeGeneratorX86 : public CodeGenerator { X86Assembler assembler_; const X86InstructionSetFeatures& isa_features_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_; @@ -642,7 +645,7 @@ class CodeGeneratorX86 : public CodeGenerator { // Type patch locations for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; // String patch locations; type depends on configuration (intern table or boot image PIC). - ArenaDeque<X86PcRelativePatchInfo> string_patches_; + ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_; // String patch locations for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 9f8b1bb038..d5997245ef 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -993,17 +993,22 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall( DCHECK(GetCompilerOptions().IsBootImage()); __ leal(temp.AsRegister<CpuRegister>(), Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); - RecordBootMethodPatch(invoke); + RecordBootImageMethodPatch(invoke); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + __ movl(temp.AsRegister<CpuRegister>(), + Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); + RecordBootImageRelRoPatch(invoke->GetDispatchInfo().method_load_data); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { __ movq(temp.AsRegister<CpuRegister>(), Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); - // Bind a new fixup label at the end of the "movl" insn. - __ Bind(NewMethodBssEntryPatch( - MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()))); + RecordMethodBssEntryPatch(invoke); break; } case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { @@ -1061,38 +1066,44 @@ void CodeGeneratorX86_64::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } -void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { - boot_image_method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().index); +void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) { + boot_image_method_patches_.emplace_back(/* target_dex_file */ nullptr, boot_image_offset); __ Bind(&boot_image_method_patches_.back().label); } -Label* CodeGeneratorX86_64::NewMethodBssEntryPatch(MethodReference target_method) { - // Add a patch entry and return the label. - method_bss_entry_patches_.emplace_back(*target_method.dex_file, target_method.index); - return &method_bss_entry_patches_.back().label; +void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { + boot_image_method_patches_.emplace_back( + invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index); + __ Bind(&boot_image_method_patches_.back().label); } -void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) { - boot_image_type_patches_.emplace_back(load_class->GetDexFile(), - load_class->GetTypeIndex().index_); +void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) { + method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); + __ Bind(&method_bss_entry_patches_.back().label); +} + +void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) { + boot_image_type_patches_.emplace_back( + &load_class->GetDexFile(), load_class->GetTypeIndex().index_); __ Bind(&boot_image_type_patches_.back().label); } Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) { - type_bss_entry_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_); + type_bss_entry_patches_.emplace_back( + &load_class->GetDexFile(), load_class->GetTypeIndex().index_); return &type_bss_entry_patches_.back().label; } -void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { - string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); - __ Bind(&string_patches_.back().label); +void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) { + boot_image_string_patches_.emplace_back( + &load_string->GetDexFile(), load_string->GetStringIndex().index_); + __ Bind(&boot_image_string_patches_.back().label); } Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); string_bss_entry_patches_.emplace_back( - load_string->GetDexFile(), load_string->GetStringIndex().index_); + &load_string->GetDexFile(), load_string->GetStringIndex().index_); return &string_bss_entry_patches_.back().label; } @@ -1107,10 +1118,18 @@ inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( for (const PatchInfo<Label>& info : infos) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back( - Factory(literal_offset, &info.dex_file, info.label.Position(), info.index)); + Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index)); } } +linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. + return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -1118,7 +1137,7 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - string_patches_.size() + + boot_image_string_patches_.size() + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -1127,13 +1146,14 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); + EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1231,7 +1251,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1268,6 +1288,12 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + __ addw(Address(CpuRegister(kMethodRegisterArgument), + ArtMethod::HotnessCountOffset().Int32Value()), + Immediate(1)); + } + if (!skip_overflow_check) { size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64); __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes))); @@ -1459,6 +1485,11 @@ void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0)); + __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()), + Immediate(1)); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -2262,7 +2293,9 @@ Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Ty case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: + case DataType::Type::kUint32: case DataType::Type::kInt32: + case DataType::Type::kUint64: case DataType::Type::kInt64: return Location::RegisterLocation(RAX); @@ -2331,6 +2364,8 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type } } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; break; @@ -3807,6 +3842,241 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + // The following is sub-optimal, but all we can do for now. It would be fine to also accept + // the second input to be the output (we can simply swap inputs). + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + if (type == DataType::Type::kInt64) { + __ cmpq(out, op2); + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + __ cmpl(out, op2); + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false); + } +} + +void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + NearLabel nan, done, op2_label; + if (type == DataType::Type::kFloat64) { + __ ucomisd(out, op2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (type == DataType::Type::kFloat64) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (type == DataType::Type::kFloat64) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + if (type == DataType::Type::kFloat64) { + __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000))); + } else { + __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000))); + } + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (type == DataType::Type::kFloat64) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderX86_64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderX86_64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderX86_64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); + // Create mask. + __ movl(mask, out); + __ sarl(mask, Immediate(31)); + // Add mask. + __ addl(out, mask); + __ xorl(out, mask); + break; + } + case DataType::Type::kInt64: { + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); + // Create mask. + __ movq(mask, out); + __ sarq(mask, Immediate(63)); + // Add mask. + __ addq(out, mask); + __ xorq(out, mask); + break; + } + case DataType::Type::kFloat32: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF))); + __ andps(out, mask); + break; + } + case DataType::Type::kFloat64: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); + __ andpd(out, mask); + break; + } + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::Any()); @@ -4296,6 +4566,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << load_type; UNREACHABLE(); @@ -4459,6 +4731,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); @@ -4752,6 +5026,8 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -4991,6 +5267,8 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -5440,27 +5718,6 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( // No need for memory fence, thanks to the x86-64 memory model. } -void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, - CpuRegister temp) { - uint32_t path_to_root = check->GetBitstringPathToRoot(); - uint32_t mask = check->GetBitstringMask(); - DCHECK(IsPowerOfTwo(mask + 1)); - size_t mask_bits = WhichPowerOf2(mask + 1); - - if ((false) && mask_bits == 16u) { - // FIXME: cmpw() erroneously emits the constant as 32 bits instead of 16 bits. b/71853552 - // Compare the bitstring in memory. - __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); - } else { - // /* uint32_t */ temp = temp->status_ - __ movl(temp, Address(temp, mirror::Class::StatusOffset())); - // Compare the bitstring bits using SUB. - __ subl(temp, Immediate(path_to_root)); - // Shift out bits that do not contribute to the comparison. - __ shll(temp, Immediate(32u - mask_bits)); - } -} - HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { switch (desired_class_load_kind) { @@ -5527,7 +5784,7 @@ Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file, Handle<mirror::Class> handle) { ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); // Add a patch entry and return the label. - jit_class_patches_.emplace_back(dex_file, type_index.index_); + jit_class_patches_.emplace_back(&dex_file, type_index.index_); PatchInfo<Label>* info = &jit_class_patches_.back(); return &info->label; } @@ -5568,7 +5825,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootTypePatch(cls); + codegen_->RecordBootImageTypePatch(cls); break; case HLoadClass::LoadKind::kBootImageAddress: { DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); @@ -5581,7 +5838,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S case HLoadClass::LoadKind::kBootImageClassTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootTypePatch(cls); + codegen_->RecordBootImageTypePatch(cls); // Extract the reference from the slot data, i.e. clear the hash bits. int32_t masked_hash = ClassTable::TableSlot::MaskHash( ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); @@ -5692,7 +5949,7 @@ Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file, Handle<mirror::String> handle) { ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); // Add a patch entry and return the label. - jit_string_patches_.emplace_back(dex_file, string_index.index_); + jit_string_patches_.emplace_back(&dex_file, string_index.index_); PatchInfo<Label>* info = &jit_string_patches_.back(); return &info->label; } @@ -5708,7 +5965,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootStringPatch(load); + codegen_->RecordBootImageStringPatch(load); return; } case HLoadString::LoadKind::kBootImageAddress: { @@ -5721,7 +5978,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA case HLoadString::LoadKind::kBootImageInternTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootStringPatch(load); + codegen_->RecordBootImageStringPatch(load); return; } case HLoadString::LoadKind::kBssEntry: { @@ -5833,8 +6090,6 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; - case TypeCheckKind::kBitstringCheck: - break; } LocationSummary* locations = @@ -5843,13 +6098,7 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); - } else { - locations->SetInAt(1, Location::Any()); - } + locations->SetInAt(1, Location::Any()); // Note that TypeCheckSlowPathX86_64 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for @@ -6078,27 +6327,6 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - out_loc, - obj_loc, - class_offset, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, out); - if (zero.IsLinked()) { - __ j(kNotEqual, &zero); - __ movl(out, Immediate(1)); - __ jmp(&done); - } else { - __ setcc(kEqual, out); - // setcc only sets the low byte. - __ andl(out, Immediate(1)); - } - break; - } } if (zero.IsLinked()) { @@ -6125,10 +6353,6 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { // Require a register for the interface check since there is a loop that compares the class to // a memory address. locations->SetInAt(1, Location::RequiresRegister()); - } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); } else { locations->SetInAt(1, Location::Any()); } @@ -6315,7 +6539,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { break; } - case TypeCheckKind::kInterfaceCheck: { + case TypeCheckKind::kInterfaceCheck: // Fast path for the interface check. Try to avoid read barriers to improve the fast path. // We can not get false positives by doing this. // /* HeapReference<Class> */ temp = obj->klass_ @@ -6351,20 +6575,6 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // If `cls` was poisoned above, unpoison it. __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>()); break; - } - - case TypeCheckKind::kBitstringCheck: { - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - kWithoutReadBarrier); - - GenerateBitstringTypeCheckCompare(instruction, temp); - __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); - break; - } } if (done.IsLinked()) { @@ -7205,13 +7415,13 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const PatchInfo<Label>& info : jit_string_patches_) { - StringReference string_reference(&info.dex_file, dex::StringIndex(info.index)); + StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index)); uint64_t index_in_table = GetJitStringRootIndex(string_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } for (const PatchInfo<Label>& info : jit_class_patches_) { - TypeReference type_reference(&info.dex_file, dex::TypeIndex(info.index)); + TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index)); uint64_t index_in_table = GetJitClassRootIndex(type_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 97f8ec7ae0..5c8ed6cd8c 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -208,7 +208,6 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { // the suspend call. void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); - void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp); void HandleBitwiseOperation(HBinaryOperation* operation); void GenerateRemFP(HRem* rem); void DivRemOneOrMinusOne(HBinaryOperation* instruction); @@ -223,6 +222,10 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -411,11 +414,12 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateVirtualCall( HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; - void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); - Label* NewMethodBssEntryPatch(MethodReference target_method); - void RecordBootTypePatch(HLoadClass* load_class); + void RecordBootImageRelRoPatch(uint32_t boot_image_offset); + void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); + void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke); + void RecordBootImageTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); - void RecordBootStringPatch(HLoadString* load_string); + void RecordBootImageStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex string_index, @@ -605,7 +609,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Used for fixups to the constant area. int constant_area_start_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_; @@ -614,7 +618,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Type patch locations for kBssEntry. ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; // String patch locations; type depends on configuration (intern table or boot image PIC). - ArenaDeque<PatchInfo<Label>> string_patches_; + ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; // String patch locations for kBssEntry. ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index ba4040acad..a0fd5ffcb1 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -18,6 +18,7 @@ #include <memory> #include "base/macros.h" +#include "base/utils.h" #include "builder.h" #include "codegen_test_utils.h" #include "dex/dex_file.h" @@ -26,7 +27,6 @@ #include "nodes.h" #include "optimizing_unit_test.h" #include "register_allocator_linear_scan.h" -#include "utils.h" #include "utils/arm/assembler_arm_vixl.h" #include "utils/arm/managed_register_arm.h" #include "utils/mips/managed_register_mips.h" diff --git a/compiler/optimizing/data_type-inl.h b/compiler/optimizing/data_type-inl.h index e389bad3ad..94807e8fc9 100644 --- a/compiler/optimizing/data_type-inl.h +++ b/compiler/optimizing/data_type-inl.h @@ -18,7 +18,7 @@ #define ART_COMPILER_OPTIMIZING_DATA_TYPE_INL_H_ #include "data_type.h" -#include "primitive.h" +#include "dex/primitive.h" namespace art { @@ -53,7 +53,9 @@ constexpr char DataType::TypeId(DataType::Type type) { case DataType::Type::kInt8: return 'b'; // Java byte (B). case DataType::Type::kUint16: return 'c'; // Java char (C). case DataType::Type::kInt16: return 's'; // Java short (S). + case DataType::Type::kUint32: return 'u'; // Picked 'u' for unsigned. case DataType::Type::kInt32: return 'i'; // Java int (I). + case DataType::Type::kUint64: return 'w'; // Picked 'w' for long unsigned. case DataType::Type::kInt64: return 'j'; // Java long (J). case DataType::Type::kFloat32: return 'f'; // Java float (F). case DataType::Type::kFloat64: return 'd'; // Java double (D). diff --git a/compiler/optimizing/data_type.cc b/compiler/optimizing/data_type.cc index 3c99a76c17..cb354f46cc 100644 --- a/compiler/optimizing/data_type.cc +++ b/compiler/optimizing/data_type.cc @@ -25,7 +25,9 @@ static const char* kTypeNames[] = { "Int8", "Uint16", "Int16", + "Uint32", "Int32", + "Uint64", "Int64", "Float32", "Float64", diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h index 548fe28cee..4a6c91459f 100644 --- a/compiler/optimizing/data_type.h +++ b/compiler/optimizing/data_type.h @@ -34,7 +34,9 @@ class DataType { kInt8, kUint16, kInt16, + kUint32, kInt32, + kUint64, kInt64, kFloat32, kFloat64, @@ -55,9 +57,11 @@ class DataType { case Type::kUint16: case Type::kInt16: return 1; + case Type::kUint32: case Type::kInt32: case Type::kFloat32: return 2; + case Type::kUint64: case Type::kInt64: case Type::kFloat64: return 3; @@ -80,9 +84,11 @@ class DataType { case Type::kUint16: case Type::kInt16: return 2; + case Type::kUint32: case Type::kInt32: case Type::kFloat32: return 4; + case Type::kUint64: case Type::kInt64: case Type::kFloat64: return 8; @@ -107,7 +113,9 @@ class DataType { case Type::kInt8: case Type::kUint16: case Type::kInt16: + case Type::kUint32: case Type::kInt32: + case Type::kUint64: case Type::kInt64: return true; default: @@ -120,11 +128,12 @@ class DataType { } static bool Is64BitType(Type type) { - return type == Type::kInt64 || type == Type::kFloat64; + return type == Type::kUint64 || type == Type::kInt64 || type == Type::kFloat64; } static bool IsUnsignedType(Type type) { - return type == Type::kBool || type == Type::kUint8 || type == Type::kUint16; + return type == Type::kBool || type == Type::kUint8 || type == Type::kUint16 || + type == Type::kUint32 || type == Type::kUint64; } // Return the general kind of `type`, fusing integer-like types as Type::kInt. @@ -133,10 +142,14 @@ class DataType { case Type::kBool: case Type::kUint8: case Type::kInt8: - case Type::kInt16: case Type::kUint16: + case Type::kInt16: + case Type::kUint32: case Type::kInt32: return Type::kInt32; + case Type::kUint64: + case Type::kInt64: + return Type::kInt64; default: return type; } @@ -154,8 +167,12 @@ class DataType { return std::numeric_limits<uint16_t>::min(); case Type::kInt16: return std::numeric_limits<int16_t>::min(); + case Type::kUint32: + return std::numeric_limits<uint32_t>::min(); case Type::kInt32: return std::numeric_limits<int32_t>::min(); + case Type::kUint64: + return std::numeric_limits<uint64_t>::min(); case Type::kInt64: return std::numeric_limits<int64_t>::min(); default: @@ -176,8 +193,12 @@ class DataType { return std::numeric_limits<uint16_t>::max(); case Type::kInt16: return std::numeric_limits<int16_t>::max(); + case Type::kUint32: + return std::numeric_limits<uint32_t>::max(); case Type::kInt32: return std::numeric_limits<int32_t>::max(); + case Type::kUint64: + return std::numeric_limits<uint64_t>::max(); case Type::kInt64: return std::numeric_limits<int64_t>::max(); default: diff --git a/compiler/optimizing/data_type_test.cc b/compiler/optimizing/data_type_test.cc index ca137b7c7c..8fea22bce8 100644 --- a/compiler/optimizing/data_type_test.cc +++ b/compiler/optimizing/data_type_test.cc @@ -20,7 +20,7 @@ #include "base/array_ref.h" #include "base/macros.h" -#include "primitive.h" +#include "dex/primitive.h" namespace art { diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index cca1055ac8..9fa0f72e80 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -146,6 +146,65 @@ static HConstant* Evaluate(HCondition* condition, HInstruction* left, HInstructi } } +static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* throws) { + // Test for an if as last statement. + if (!block->EndsWithIf()) { + return false; + } + HIf* ifs = block->GetLastInstruction()->AsIf(); + // Find either: + // if obj == null + // throws + // else + // not_throws + // or: + // if obj != null + // not_throws + // else + // throws + HInstruction* cond = ifs->InputAt(0); + HBasicBlock* not_throws = nullptr; + if (throws == ifs->IfTrueSuccessor() && cond->IsEqual()) { + not_throws = ifs->IfFalseSuccessor(); + } else if (throws == ifs->IfFalseSuccessor() && cond->IsNotEqual()) { + not_throws = ifs->IfTrueSuccessor(); + } else { + return false; + } + DCHECK(cond->IsEqual() || cond->IsNotEqual()); + HInstruction* obj = cond->InputAt(1); + if (obj->IsNullConstant()) { + obj = cond->InputAt(0); + } else if (!cond->InputAt(0)->IsNullConstant()) { + return false; + } + // Scan all uses of obj and find null check under control dependence. + HBoundType* bound = nullptr; + const HUseList<HInstruction*>& uses = obj->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end;) { + HInstruction* user = it->GetUser(); + ++it; // increment before possibly replacing + if (user->IsNullCheck()) { + HBasicBlock* user_block = user->GetBlock(); + if (user_block != block && + user_block != throws && + block->Dominates(user_block)) { + if (bound == nullptr) { + ReferenceTypeInfo ti = obj->GetReferenceTypeInfo(); + bound = new (obj->GetBlock()->GetGraph()->GetAllocator()) HBoundType(obj); + bound->SetUpperBound(ti, /*can_be_null*/ false); + bound->SetReferenceTypeInfo(ti); + bound->SetCanBeNull(false); + not_throws->InsertInstructionBefore(bound, not_throws->GetFirstInstruction()); + } + user->ReplaceWith(bound); + user_block->RemoveInstruction(user); + } + } + } + return bound != nullptr; +} + // Simplify the pattern: // // B1 @@ -203,6 +262,11 @@ bool HDeadCodeElimination::SimplifyAlwaysThrows() { block->ReplaceSuccessor(succ, exit); rerun_dominance_and_loop_analysis = true; MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke); + // Perform a quick follow up optimization on object != null control dependences + // that is much cheaper to perform now than in a later phase. + if (RemoveNonNullControlDependences(pred, block)) { + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck); + } } } } diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index fbcbe3608e..c88baa8610 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -25,11 +25,6 @@ #include "base/bit_vector-inl.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" -#include "handle.h" -#include "mirror/class.h" -#include "obj_ptr-inl.h" -#include "scoped_thread_state_change-inl.h" -#include "subtype_check.h" namespace art { @@ -553,83 +548,28 @@ void GraphChecker::VisitReturnVoid(HReturnVoid* ret) { } } -void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, - size_t input_pos, - bool check_value, - uint32_t expected_value, - const char* name) { - if (!check->InputAt(input_pos)->IsIntConstant()) { - AddError(StringPrintf("%s:%d (bitstring) expects a HIntConstant input %zu (%s), not %s:%d.", - check->DebugName(), - check->GetId(), - input_pos, - name, - check->InputAt(2)->DebugName(), - check->InputAt(2)->GetId())); - } else if (check_value) { - uint32_t actual_value = - static_cast<uint32_t>(check->InputAt(input_pos)->AsIntConstant()->GetValue()); - if (actual_value != expected_value) { - AddError(StringPrintf("%s:%d (bitstring) has %s 0x%x, not 0x%x as expected.", - check->DebugName(), - check->GetId(), - name, - actual_value, - expected_value)); - } - } -} - -void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) { +void GraphChecker::VisitCheckCast(HCheckCast* check) { VisitInstruction(check); HInstruction* input = check->InputAt(1); - if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { - if (!input->IsNullConstant()) { - AddError(StringPrintf("%s:%d (bitstring) expects a HNullConstant as second input, not %s:%d.", - check->DebugName(), - check->GetId(), - input->DebugName(), - input->GetId())); - } - bool check_values = false; - BitString::StorageType expected_path_to_root = 0u; - BitString::StorageType expected_mask = 0u; - { - ScopedObjectAccess soa(Thread::Current()); - ObjPtr<mirror::Class> klass = check->GetClass().Get(); - MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); - SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); - if (state == SubtypeCheckInfo::kAssigned) { - expected_path_to_root = - SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass); - expected_mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass); - check_values = true; - } else { - AddError(StringPrintf("%s:%d (bitstring) references a class with unassigned bitstring.", - check->DebugName(), - check->GetId())); - } - } - CheckTypeCheckBitstringInput( - check, /* input_pos */ 2, check_values, expected_path_to_root, "path_to_root"); - CheckTypeCheckBitstringInput(check, /* input_pos */ 3, check_values, expected_mask, "mask"); - } else { - if (!input->IsLoadClass()) { - AddError(StringPrintf("%s:%d (classic) expects a HLoadClass as second input, not %s:%d.", - check->DebugName(), - check->GetId(), - input->DebugName(), - input->GetId())); - } + if (!input->IsLoadClass()) { + AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", + check->DebugName(), + check->GetId(), + input->DebugName(), + input->GetId())); } } -void GraphChecker::VisitCheckCast(HCheckCast* check) { - HandleTypeCheckInstruction(check); -} - void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { - HandleTypeCheckInstruction(instruction); + VisitInstruction(instruction); + HInstruction* input = instruction->InputAt(1); + if (!input->IsLoadClass()) { + AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", + instruction->DebugName(), + instruction->GetId(), + input->DebugName(), + input->GetId())); + } } void GraphChecker::HandleLoop(HBasicBlock* loop_header) { diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index dbedc40518..0f0b49d240 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -71,12 +71,6 @@ class GraphChecker : public HGraphDelegateVisitor { void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE; void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; - void CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, - size_t input_pos, - bool check_value, - uint32_t expected_value, - const char* name); - void HandleTypeCheckInstruction(HTypeCheckInstruction* instruction); void HandleLoop(HBasicBlock* loop_header); void HandleBooleanInput(HInstruction* instruction, size_t input_index); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 55191214a6..5ff31cead5 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -27,6 +27,7 @@ #include "code_generator.h" #include "data_type-inl.h" #include "dead_code_elimination.h" +#include "dex/descriptors_names.h" #include "disassembler.h" #include "inliner.h" #include "licm.h" @@ -389,23 +390,16 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("load_kind") << load_string->GetLoadKind(); } - void HandleTypeCheckInstruction(HTypeCheckInstruction* check) { - StartAttributeStream("check_kind") << check->GetTypeCheckKind(); - StartAttributeStream("must_do_null_check") << std::boolalpha - << check->MustDoNullCheck() << std::noboolalpha; - if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { - StartAttributeStream("path_to_root") << std::hex - << "0x" << check->GetBitstringPathToRoot() << std::dec; - StartAttributeStream("mask") << std::hex << "0x" << check->GetBitstringMask() << std::dec; - } - } - void VisitCheckCast(HCheckCast* check_cast) OVERRIDE { - HandleTypeCheckInstruction(check_cast); + StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind(); + StartAttributeStream("must_do_null_check") << std::boolalpha + << check_cast->MustDoNullCheck() << std::noboolalpha; } void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE { - HandleTypeCheckInstruction(instance_of); + StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind(); + StartAttributeStream("must_do_null_check") << std::boolalpha + << instance_of->MustDoNullCheck() << std::noboolalpha; } void VisitArrayLength(HArrayLength* array_length) OVERRIDE { @@ -452,6 +446,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { ? GetGraph()->GetDexFile().PrettyMethod(invoke->GetDexMethodIndex(), kWithSignature) : method->PrettyMethod(kWithSignature); StartAttributeStream("method_name") << method_name; + StartAttributeStream("always_throws") << std::boolalpha + << invoke->AlwaysThrows() + << std::noboolalpha; } void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE { @@ -540,20 +537,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE { VisitVecBinaryOperation(hadd); - StartAttributeStream("unsigned") << std::boolalpha << hadd->IsUnsigned() << std::noboolalpha; StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha; } - void VisitVecMin(HVecMin* min) OVERRIDE { - VisitVecBinaryOperation(min); - StartAttributeStream("unsigned") << std::boolalpha << min->IsUnsigned() << std::noboolalpha; - } - - void VisitVecMax(HVecMax* max) OVERRIDE { - VisitVecBinaryOperation(max); - StartAttributeStream("unsigned") << std::boolalpha << max->IsUnsigned() << std::noboolalpha; - } - void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE { VisitVecOperation(instruction); StartAttributeStream("kind") << instruction->GetOpKind(); @@ -655,32 +641,20 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha; } - // For the builder and the inliner, we want to add extra information on HInstructions - // that have reference types, and also HInstanceOf/HCheckcast. if ((IsPass(HGraphBuilder::kBuilderPassName) || IsPass(HInliner::kInlinerPassName)) - && (instruction->GetType() == DataType::Type::kReference || - instruction->IsInstanceOf() || - instruction->IsCheckCast())) { - ReferenceTypeInfo info = (instruction->GetType() == DataType::Type::kReference) - ? instruction->IsLoadClass() - ? instruction->AsLoadClass()->GetLoadedClassRTI() - : instruction->GetReferenceTypeInfo() - : instruction->IsInstanceOf() - ? instruction->AsInstanceOf()->GetTargetClassRTI() - : instruction->AsCheckCast()->GetTargetClassRTI(); + && (instruction->GetType() == DataType::Type::kReference)) { + ReferenceTypeInfo info = instruction->IsLoadClass() + ? instruction->AsLoadClass()->GetLoadedClassRTI() + : instruction->GetReferenceTypeInfo(); ScopedObjectAccess soa(Thread::Current()); if (info.IsValid()) { StartAttributeStream("klass") << mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get()); - if (instruction->GetType() == DataType::Type::kReference) { - StartAttributeStream("can_be_null") - << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; - } + StartAttributeStream("can_be_null") + << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; - } else if (instruction->IsLoadClass() || - instruction->IsInstanceOf() || - instruction->IsCheckCast()) { + } else if (instruction->IsLoadClass()) { StartAttributeStream("klass") << "unresolved"; } else { // The NullConstant may be added to the graph during other passes that happen between diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 71c394ec1f..f05159b735 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -17,11 +17,11 @@ #include "gvn.h" #include "base/arena_bit_vector.h" +#include "base/bit_vector-inl.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" -#include "base/bit_vector-inl.h" +#include "base/utils.h" #include "side_effects_analysis.h" -#include "utils.h" namespace art { diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 99dec11240..0a310ca940 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -78,22 +78,15 @@ static bool IsGEZero(HInstruction* instruction) { DCHECK(instruction != nullptr); if (instruction->IsArrayLength()) { return true; - } else if (instruction->IsInvokeStaticOrDirect()) { - switch (instruction->AsInvoke()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - // Instruction MIN(>=0, >=0) is >= 0. - return IsGEZero(instruction->InputAt(0)) && - IsGEZero(instruction->InputAt(1)); - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - // Instruction ABS(>=0) is >= 0. - // NOTE: ABS(minint) = minint prevents assuming - // >= 0 without looking at the argument. - return IsGEZero(instruction->InputAt(0)); - default: - break; - } + } else if (instruction->IsMin()) { + // Instruction MIN(>=0, >=0) is >= 0. + return IsGEZero(instruction->InputAt(0)) && + IsGEZero(instruction->InputAt(1)); + } else if (instruction->IsAbs()) { + // Instruction ABS(>=0) is >= 0. + // NOTE: ABS(minint) = minint prevents assuming + // >= 0 without looking at the argument. + return IsGEZero(instruction->InputAt(0)); } int64_t value = -1; return IsInt64AndGet(instruction, &value) && value >= 0; @@ -102,21 +95,14 @@ static bool IsGEZero(HInstruction* instruction) { /** Hunts "under the hood" for a suitable instruction at the hint. */ static bool IsMaxAtHint( HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) { - if (instruction->IsInvokeStaticOrDirect()) { - switch (instruction->AsInvoke()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - // For MIN(x, y), return most suitable x or y as maximum. - return IsMaxAtHint(instruction->InputAt(0), hint, suitable) || - IsMaxAtHint(instruction->InputAt(1), hint, suitable); - default: - break; - } + if (instruction->IsMin()) { + // For MIN(x, y), return most suitable x or y as maximum. + return IsMaxAtHint(instruction->InputAt(0), hint, suitable) || + IsMaxAtHint(instruction->InputAt(1), hint, suitable); } else { *suitable = instruction; return HuntForDeclaration(instruction) == hint; } - return false; } /** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */ @@ -365,11 +351,11 @@ void InductionVarRange::Replace(HInstruction* instruction, } } -bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const { +bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const { HInductionVarAnalysis::InductionInfo *trip = induction_analysis_->LookupInfo(loop, GetLoopControl(loop)); if (trip != nullptr && !IsUnsafeTripCount(trip)) { - IsConstant(trip->op_a, kExact, tc); + IsConstant(trip->op_a, kExact, trip_count); return true; } return false; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 452be6feae..4fc7262265 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -392,9 +392,33 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) { return single_impl; } -static bool AlwaysThrows(ArtMethod* method) { - CodeItemDataAccessor accessor(method); +static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod* method) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (!method->GetDeclaringClass()->IsVerified()) { + if (Runtime::Current()->UseJitCompilation()) { + // We're at runtime, we know this is cold code if the class + // is not verified, so don't bother analyzing. + return true; + } + uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex(); + if (!compiler_driver->IsMethodVerifiedWithoutFailures( + method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) { + // Method has soft or hard failures, don't analyze. + return true; + } + } + return false; +} + +static bool AlwaysThrows(CompilerDriver* const compiler_driver, ArtMethod* method) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(method != nullptr); + // Skip non-compilable and unverified methods. + if (!method->IsCompilable() || IsMethodUnverified(compiler_driver, method)) { + return false; + } // Skip native methods, methods with try blocks, and methods that are too large. + CodeItemDataAccessor accessor(method->DexInstructionData()); if (!accessor.HasCodeItem() || accessor.TriesSize() != 0 || accessor.InsnsSizeInCodeUnits() > kMaximumNumberOfTotalInstructions) { @@ -477,7 +501,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface); } } - } else if (!cha_devirtualize && AlwaysThrows(actual_method)) { + } else if (!cha_devirtualize && AlwaysThrows(compiler_driver_, actual_method)) { // Set always throws property for non-inlined method call with single target // (unless it was obtained through CHA, because that would imply we have // to add the CHA dependency, which seems not worth it). @@ -1418,7 +1442,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile()); - CodeItemDataAccessor accessor(method); + CodeItemDataAccessor accessor(method->DexInstructionData()); if (!accessor.HasCodeItem()) { LOG_FAIL_NO_STAT() @@ -1447,18 +1471,14 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified) << "Method " << method->PrettyMethod() << " has soft failures un-handled by the compiler, so it cannot be inlined"; + return false; } - if (!method->GetDeclaringClass()->IsVerified()) { - uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex(); - if (Runtime::Current()->UseJitCompilation() || - !compiler_driver_->IsMethodVerifiedWithoutFailures( - method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) { - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified) - << "Method " << method->PrettyMethod() - << " couldn't be verified, so it cannot be inlined"; - return false; - } + if (IsMethodUnverified(compiler_driver_, method)) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified) + << "Method " << method->PrettyMethod() + << " couldn't be verified, so it cannot be inlined"; + return false; } if (invoke_instruction->IsInvokeStaticOrDirect() && @@ -1697,7 +1717,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); uint32_t method_index = resolved_method->GetDexMethodIndex(); - CodeItemDebugInfoAccessor code_item_accessor(resolved_method); + CodeItemDebugInfoAccessor code_item_accessor(resolved_method->DexInstructionDebugInfo()); ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); Handle<mirror::DexCache> dex_cache = NewHandleIfDifferent(resolved_method->GetDexCache(), caller_compilation_unit_.GetDexCache(), diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index e81d97b0a8..02465d37ba 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -18,7 +18,7 @@ #define ART_COMPILER_OPTIMIZING_INLINER_H_ #include "dex/dex_file_types.h" -#include "invoke_type.h" +#include "dex/invoke_type.h" #include "jit/profile_compilation_info.h" #include "optimization.h" diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 0205c6a4d3..a38e2717cf 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -960,14 +960,18 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, dchecked_integral_cast<uint64_t>(string_init_entry_point) }; - MethodReference target_method(dex_file_, method_idx); + ScopedObjectAccess soa(Thread::Current()); + MethodReference target_method(resolved_method->GetDexFile(), + resolved_method->GetDexMethodIndex()); + // We pass null for the resolved_method to ensure optimizations + // don't rely on it. HInvoke* invoke = new (allocator_) HInvokeStaticOrDirect( allocator_, number_of_arguments - 1, DataType::Type::kReference /*return_type */, dex_pc, method_idx, - nullptr, + nullptr /* resolved_method */, dispatch_info, invoke_type, target_method, @@ -1811,6 +1815,29 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object, } } +static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (cls == nullptr) { + return TypeCheckKind::kUnresolvedCheck; + } else if (cls->IsInterface()) { + return TypeCheckKind::kInterfaceCheck; + } else if (cls->IsArrayClass()) { + if (cls->GetComponentType()->IsObjectClass()) { + return TypeCheckKind::kArrayObjectCheck; + } else if (cls->CannotBeAssignedFromOtherTypes()) { + return TypeCheckKind::kExactCheck; + } else { + return TypeCheckKind::kArrayCheck; + } + } else if (cls->IsFinal()) { + return TypeCheckKind::kExactCheck; + } else if (cls->IsAbstract()) { + return TypeCheckKind::kAbstractClassCheck; + } else { + return TypeCheckKind::kClassHierarchyCheck; + } +} + void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) { HLoadString* load_string = new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc); @@ -1825,8 +1852,22 @@ void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) { ScopedObjectAccess soa(Thread::Current()); const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); - Handle<mirror::Class> klass = ResolveClass(soa, type_index); - bool needs_access_check = LoadClassNeedsAccessCheck(klass); + Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); + Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass( + soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_)); + + bool needs_access_check = true; + if (klass != nullptr) { + if (klass->IsPublic()) { + needs_access_check = false; + } else { + ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); + if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) { + needs_access_check = false; + } + } + } + return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check); } @@ -1871,83 +1912,25 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, return load_class; } -Handle<mirror::Class> HInstructionBuilder::ResolveClass(ScopedObjectAccess& soa, - dex::TypeIndex type_index) { - Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); - ObjPtr<mirror::Class> klass = compiler_driver_->ResolveClass( - soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_); - // TODO: Avoid creating excessive handles if the method references the same class repeatedly. - // (Use a map on the local_allocator_.) - return handles_->NewHandle(klass); -} - -bool HInstructionBuilder::LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) { - if (klass == nullptr) { - return true; - } else if (klass->IsPublic()) { - return false; - } else { - ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); - return compiling_class == nullptr || !compiling_class->CanAccess(klass.Get()); - } -} - void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction, uint8_t destination, uint8_t reference, dex::TypeIndex type_index, uint32_t dex_pc) { HInstruction* object = LoadLocal(reference, DataType::Type::kReference); + HLoadClass* cls = BuildLoadClass(type_index, dex_pc); ScopedObjectAccess soa(Thread::Current()); - const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); - Handle<mirror::Class> klass = ResolveClass(soa, type_index); - bool needs_access_check = LoadClassNeedsAccessCheck(klass); - TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind( - klass.Get(), code_generator_, compiler_driver_, needs_access_check); - - HInstruction* class_or_null = nullptr; - HIntConstant* bitstring_path_to_root = nullptr; - HIntConstant* bitstring_mask = nullptr; - if (check_kind == TypeCheckKind::kBitstringCheck) { - // TODO: Allow using the bitstring check also if we need an access check. - DCHECK(!needs_access_check); - class_or_null = graph_->GetNullConstant(dex_pc); - MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); - uint32_t path_to_root = - SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass.Get()); - uint32_t mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass.Get()); - bitstring_path_to_root = graph_->GetIntConstant(static_cast<int32_t>(path_to_root), dex_pc); - bitstring_mask = graph_->GetIntConstant(static_cast<int32_t>(mask), dex_pc); - } else { - class_or_null = BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check); - } - DCHECK(class_or_null != nullptr); - + TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass()); if (instruction.Opcode() == Instruction::INSTANCE_OF) { - AppendInstruction(new (allocator_) HInstanceOf(object, - class_or_null, - check_kind, - klass, - dex_pc, - allocator_, - bitstring_path_to_root, - bitstring_mask)); + AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc)); UpdateLocal(destination, current_block_->GetLastInstruction()); } else { DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST); // We emit a CheckCast followed by a BoundType. CheckCast is a statement // which may throw. If it succeeds BoundType sets the new type of `object` // for all subsequent uses. - AppendInstruction( - new (allocator_) HCheckCast(object, - class_or_null, - check_kind, - klass, - dex_pc, - allocator_, - bitstring_path_to_root, - bitstring_mask)); + AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc)); AppendInstruction(new (allocator_) HBoundType(object, dex_pc)); UpdateLocal(reference, current_block_->GetLastInstruction()); } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index f78829232d..4428c53277 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -39,7 +39,6 @@ class DexCompilationUnit; class HBasicBlockBuilder; class Instruction; class OptimizingCompilerStats; -class ScopedObjectAccess; class SsaBuilder; class VariableSizedHandleScope; @@ -233,12 +232,6 @@ class HInstructionBuilder : public ValueObject { bool needs_access_check) REQUIRES_SHARED(Locks::mutator_lock_); - Handle<mirror::Class> ResolveClass(ScopedObjectAccess& soa, dex::TypeIndex type_index) - REQUIRES_SHARED(Locks::mutator_lock_); - - bool LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) - REQUIRES_SHARED(Locks::mutator_lock_); - // Returns the outer-most compiling method's class. ObjPtr<mirror::Class> GetOutermostCompilingClass() const; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 2538fa38f1..34837700a2 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -120,6 +120,9 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifyReturnThis(HInvoke* invoke); void SimplifyAllocationIntrinsic(HInvoke* invoke); void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind); + void SimplifyMin(HInvoke* invoke, DataType::Type type); + void SimplifyMax(HInvoke* invoke, DataType::Type type); + void SimplifyAbs(HInvoke* invoke, DataType::Type type); CodeGenerator* codegen_; CompilerDriver* compiler_driver_; @@ -576,9 +579,7 @@ bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInst // Returns whether doing a type test between the class of `object` against `klass` has // a statically known outcome. The result of the test is stored in `outcome`. -static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti, - HInstruction* object, - /*out*/bool* outcome) { +static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) { DCHECK(!object->IsNullConstant()) << "Null constants should be special cased"; ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo(); ScopedObjectAccess soa(Thread::Current()); @@ -588,6 +589,7 @@ static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti, return false; } + ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI(); if (!class_rti.IsValid()) { // Happens when the loaded class is unresolved. return false; @@ -612,8 +614,8 @@ static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti, void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { HInstruction* object = check_cast->InputAt(0); - if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && - check_cast->GetTargetClass()->NeedsAccessCheck()) { + HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); + if (load_class->NeedsAccessCheck()) { // If we need to perform an access check we cannot remove the instruction. return; } @@ -631,18 +633,15 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { // Note: The `outcome` is initialized to please valgrind - the compiler can reorder // the return value check with the `outcome` check, b/27651442 . bool outcome = false; - if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) { + if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { if (outcome) { check_cast->GetBlock()->RemoveInstruction(check_cast); MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast); - if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { - HLoadClass* load_class = check_cast->GetTargetClass(); - if (!load_class->HasUses()) { - // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. - // However, here we know that it cannot because the checkcast was successfull, hence - // the class was already loaded. - load_class->GetBlock()->RemoveInstruction(load_class); - } + if (!load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the checkcast was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); } } else { // Don't do anything for exceptional cases for now. Ideally we should remove @@ -653,8 +652,8 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { HInstruction* object = instruction->InputAt(0); - if (instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && - instruction->GetTargetClass()->NeedsAccessCheck()) { + HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass(); + if (load_class->NeedsAccessCheck()) { // If we need to perform an access check we cannot remove the instruction. return; } @@ -677,7 +676,7 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { // Note: The `outcome` is initialized to please valgrind - the compiler can reorder // the return value check with the `outcome` check, b/27651442 . bool outcome = false; - if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) { + if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf); if (outcome && can_be_null) { // Type test will succeed, we just need a null test. @@ -690,14 +689,11 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { } RecordSimplification(); instruction->GetBlock()->RemoveInstruction(instruction); - if (outcome && instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { - HLoadClass* load_class = instruction->GetTargetClass(); - if (!load_class->HasUses()) { - // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. - // However, here we know that it cannot because the instanceof check was successfull, hence - // the class was already loaded. - load_class->GetBlock()->RemoveInstruction(load_class); - } + if (outcome && !load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the instanceof check was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); } } } @@ -857,34 +853,10 @@ static HInstruction* NewIntegralAbs(ArenaAllocator* allocator, HInstruction* x, HInstruction* cursor) { DataType::Type type = x->GetType(); - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); - // Construct a fake intrinsic with as much context as is needed to allocate one. - // The intrinsic will always be lowered into code later anyway. - // TODO: b/65164101 : moving towards a real HAbs node makes more sense. - HInvokeStaticOrDirect::DispatchInfo dispatch_info = { - HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u - }; - HInvokeStaticOrDirect* invoke = new (allocator) HInvokeStaticOrDirect( - allocator, - 1, - type, - x->GetDexPc(), - /*method_idx*/ -1, - /*resolved_method*/ nullptr, - dispatch_info, - kStatic, - MethodReference(nullptr, dex::kDexNoIndex), - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); - invoke->SetArgumentAt(0, x); - invoke->SetIntrinsic(type == DataType::Type::kInt32 ? Intrinsics::kMathAbsInt - : Intrinsics::kMathAbsLong, - kNoEnvironmentOrCache, - kNoSideEffects, - kNoThrow); - cursor->GetBlock()->InsertInstructionBefore(invoke, cursor); - return invoke; + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); + HAbs* abs = new (allocator) HAbs(type, x, x->GetDexPc()); + cursor->GetBlock()->InsertInstructionBefore(abs, cursor); + return abs; } // Returns true if operands a and b consists of widening type conversions @@ -2437,6 +2409,27 @@ void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier); } +void InstructionSimplifierVisitor::SimplifyMin(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HMin* min = new (GetGraph()->GetAllocator()) + HMin(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, min); +} + +void InstructionSimplifierVisitor::SimplifyMax(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HMax* max = new (GetGraph()->GetAllocator()) + HMax(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, max); +} + +void InstructionSimplifierVisitor::SimplifyAbs(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HAbs* abs = new (GetGraph()->GetAllocator()) + HAbs(type, invoke->InputAt(0), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, abs); +} + void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { switch (instruction->GetIntrinsic()) { case Intrinsics::kStringEquals: @@ -2520,6 +2513,42 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { case Intrinsics::kVarHandleStoreStoreFence: SimplifyMemBarrier(instruction, MemBarrierKind::kStoreStore); break; + case Intrinsics::kMathMinIntInt: + SimplifyMin(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathMinLongLong: + SimplifyMin(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathMinFloatFloat: + SimplifyMin(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathMinDoubleDouble: + SimplifyMin(instruction, DataType::Type::kFloat64); + break; + case Intrinsics::kMathMaxIntInt: + SimplifyMax(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathMaxLongLong: + SimplifyMax(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathMaxFloatFloat: + SimplifyMax(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathMaxDoubleDouble: + SimplifyMax(instruction, DataType::Type::kFloat64); + break; + case Intrinsics::kMathAbsInt: + SimplifyAbs(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathAbsLong: + SimplifyAbs(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathAbsFloat: + SimplifyAbs(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathAbsDouble: + SimplifyAbs(instruction, DataType::Type::kFloat64); + break; default: break; } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 6928b70df7..f8dc316e45 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -18,15 +18,15 @@ #include "art_field-inl.h" #include "art_method-inl.h" +#include "base/utils.h" #include "class_linker.h" +#include "dex/invoke_type.h" #include "driver/compiler_driver.h" #include "driver/compiler_options.h" -#include "invoke_type.h" #include "mirror/dex_cache-inl.h" #include "nodes.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" -#include "utils.h" namespace art { diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 62991435c7..1035cbc2c4 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -266,6 +266,18 @@ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) { \ << " should have been converted to HIR"; \ } #define UNREACHABLE_INTRINSICS(Arch) \ +UNREACHABLE_INTRINSIC(Arch, MathMinIntInt) \ +UNREACHABLE_INTRINSIC(Arch, MathMinLongLong) \ +UNREACHABLE_INTRINSIC(Arch, MathMinFloatFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathMinDoubleDouble) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxIntInt) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxLongLong) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxFloatFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxDoubleDouble) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsInt) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsLong) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsDouble) \ UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits) \ UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits) \ UNREACHABLE_INTRINSIC(Arch, FloatIsNaN) \ diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 2f8e33f941..81c0b50932 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -344,14 +344,6 @@ void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler()); } -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - static void GenNumberOfLeadingZeros(LocationSummary* locations, DataType::Type type, MacroAssembler* masm) { @@ -536,168 +528,6 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } -static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { - Location in = locations->InAt(0); - Location out = locations->Out(); - - FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in); - FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out); - - __ Fabs(out_reg, in_reg); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); -} - -static void GenAbsInteger(LocationSummary* locations, - bool is64bit, - MacroAssembler* masm) { - Location in = locations->InAt(0); - Location output = locations->Out(); - - Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in); - Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output); - - __ Cmp(in_reg, Operand(0)); - __ Cneg(out_reg, in_reg, lt); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - bool is_double, - MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1); - FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2); - FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out); - if (is_min) { - __ Fmin(out_reg, op1_reg, op2_reg); - } else { - __ Fmax(out_reg, op1_reg, op2_reg); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler()); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - bool is_long, - MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1); - Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2); - Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out); - - __ Cmp(op1_reg, op2_reg); - __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler()); -} - void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 830d0403e4..e61a0b0809 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -432,341 +432,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invo GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_); } -static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { - __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0)); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke, GetAssembler()); -} - -static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsInteger(LocationSummary* locations, - bool is64bit, - ArmVIXLAssembler* assembler) { - Location in = locations->InAt(0); - Location output = locations->Out(); - - vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); - - if (is64bit) { - vixl32::Register in_reg_lo = LowRegisterFrom(in); - vixl32::Register in_reg_hi = HighRegisterFrom(in); - vixl32::Register out_reg_lo = LowRegisterFrom(output); - vixl32::Register out_reg_hi = HighRegisterFrom(output); - - DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; - - __ Asr(mask, in_reg_hi, 31); - __ Adds(out_reg_lo, in_reg_lo, mask); - __ Adc(out_reg_hi, in_reg_hi, mask); - __ Eor(out_reg_lo, mask, out_reg_lo); - __ Eor(out_reg_hi, mask, out_reg_hi); - } else { - vixl32::Register in_reg = RegisterFrom(in); - vixl32::Register out_reg = RegisterFrom(output); - - __ Asr(mask, in_reg, 31); - __ Add(out_reg, in_reg, mask); - __ Eor(out_reg, mask, out_reg); - } -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { - ArmVIXLAssembler* assembler = codegen->GetAssembler(); - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. - return; - } - - vixl32::SRegister op1 = SRegisterFrom(op1_loc); - vixl32::SRegister op2 = SRegisterFrom(op2_loc); - vixl32::SRegister out = OutputSRegister(invoke); - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp1 = temps.Acquire(); - vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0)); - vixl32::Label nan, done; - vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); - - DCHECK(op1.Is(out)); - - __ Vcmp(op1, op2); - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling. - - // op1 <> op2 - vixl32::ConditionType cond = is_min ? gt : lt; - { - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cond); - __ vmov(cond, F32, out, op2); - } - // for <>(not equal), we've done min/max calculation. - __ B(ne, final_label, /* far_target */ false); - - // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). - __ Vmov(temp1, op1); - __ Vmov(temp2, op2); - if (is_min) { - __ Orr(temp1, temp1, temp2); - } else { - __ And(temp1, temp1, temp2); - } - __ Vmov(out, temp1); - __ B(final_label); - - // handle NaN input. - __ Bind(&nan); - __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. - __ Vmov(out, temp1); - - if (done.IsReferenced()) { - __ Bind(&done); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); - invoke->GetLocations()->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ true, codegen_); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); - invoke->GetLocations()->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ false, codegen_); -} - -static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { - ArmVIXLAssembler* assembler = codegen->GetAssembler(); - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. - return; - } - - vixl32::DRegister op1 = DRegisterFrom(op1_loc); - vixl32::DRegister op2 = DRegisterFrom(op2_loc); - vixl32::DRegister out = OutputDRegister(invoke); - vixl32::Label handle_nan_eq, done; - vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); - - DCHECK(op1.Is(out)); - - __ Vcmp(op1, op2); - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling. - - // op1 <> op2 - vixl32::ConditionType cond = is_min ? gt : lt; - { - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cond); - __ vmov(cond, F64, out, op2); - } - // for <>(not equal), we've done min/max calculation. - __ B(ne, final_label, /* far_target */ false); - - // handle op1 == op2, max(+0.0,-0.0). - if (!is_min) { - __ Vand(F64, out, op1, op2); - __ B(final_label); - } - - // handle op1 == op2, min(+0.0,-0.0), NaN input. - __ Bind(&handle_nan_eq); - __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. - - if (done.IsReferenced()) { - __ Bind(&done); - } -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ true , codegen_); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ false, codegen_); -} - -static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. - return; - } - - vixl32::Register op1_lo = LowRegisterFrom(op1_loc); - vixl32::Register op1_hi = HighRegisterFrom(op1_loc); - vixl32::Register op2_lo = LowRegisterFrom(op2_loc); - vixl32::Register op2_hi = HighRegisterFrom(op2_loc); - vixl32::Register out_lo = LowRegisterFrom(out_loc); - vixl32::Register out_hi = HighRegisterFrom(out_loc); - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp = temps.Acquire(); - - DCHECK(op1_lo.Is(out_lo)); - DCHECK(op1_hi.Is(out_hi)); - - // Compare op1 >= op2, or op1 < op2. - __ Cmp(out_lo, op2_lo); - __ Sbcs(temp, out_hi, op2_hi); - - // Now GE/LT condition code is correct for the long comparison. - { - vixl32::ConditionType cond = is_min ? ge : lt; - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ itt(cond); - __ mov(cond, out_lo, op2_lo); - __ mov(cond, out_hi, op2_hi); - } -} - -static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMaxLong(invoke, /* is_min */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMaxLong(invoke, /* is_min */ false, GetAssembler()); -} - -static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { - vixl32::Register op1 = InputRegisterAt(invoke, 0); - vixl32::Register op2 = InputRegisterAt(invoke, 1); - vixl32::Register out = OutputRegister(invoke); - - __ Cmp(op1, op2); - - { - ExactAssemblyScope aas(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - - __ ite(is_min ? lt : gt); - __ mov(is_min ? lt : gt, out, op1); - __ mov(is_min ? ge : le, out, op2); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke, /* is_min */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke, /* is_min */ false, GetAssembler()); -} - void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index cafa5228d9..bc1292b2b7 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -58,6 +58,10 @@ inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const { return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); } +inline bool IntrinsicCodeGeneratorMIPS::HasMsa() const { + return codegen_->GetInstructionSetFeatures().HasMsa(); +} + #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, @@ -612,6 +616,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { static void GenBitCount(LocationSummary* locations, DataType::Type type, bool isR6, + bool hasMsa, MipsAssembler* assembler) { Register out = locations->Out().AsRegister<Register>(); @@ -637,85 +642,102 @@ static void GenBitCount(LocationSummary* locations, // instructions compared to a loop-based algorithm which required 47 // instructions. - if (type == DataType::Type::kInt32) { - Register in = locations->InAt(0).AsRegister<Register>(); - - __ Srl(TMP, in, 1); - __ LoadConst32(AT, 0x55555555); - __ And(TMP, TMP, AT); - __ Subu(TMP, in, TMP); - __ LoadConst32(AT, 0x33333333); - __ And(out, TMP, AT); - __ Srl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Addu(TMP, out, TMP); - __ Srl(out, TMP, 4); - __ Addu(out, out, TMP); - __ LoadConst32(AT, 0x0F0F0F0F); - __ And(out, out, AT); - __ LoadConst32(TMP, 0x01010101); - if (isR6) { - __ MulR6(out, out, TMP); + if (hasMsa) { + if (type == DataType::Type::kInt32) { + Register in = locations->InAt(0).AsRegister<Register>(); + __ Mtc1(in, FTMP); + __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Mfc1(out, FTMP); } else { - __ MulR2(out, out, TMP); + DCHECK_EQ(type, DataType::Type::kInt64); + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + __ Mtc1(in_lo, FTMP); + __ Mthc1(in_hi, FTMP); + __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Mfc1(out, FTMP); } - __ Srl(out, out, 24); } else { - DCHECK_EQ(type, DataType::Type::kInt64); - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register tmp_hi = locations->GetTemp(0).AsRegister<Register>(); - Register out_hi = locations->GetTemp(1).AsRegister<Register>(); - Register tmp_lo = TMP; - Register out_lo = out; + if (type == DataType::Type::kInt32) { + Register in = locations->InAt(0).AsRegister<Register>(); + + __ Srl(TMP, in, 1); + __ LoadConst32(AT, 0x55555555); + __ And(TMP, TMP, AT); + __ Subu(TMP, in, TMP); + __ LoadConst32(AT, 0x33333333); + __ And(out, TMP, AT); + __ Srl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Addu(TMP, out, TMP); + __ Srl(out, TMP, 4); + __ Addu(out, out, TMP); + __ LoadConst32(AT, 0x0F0F0F0F); + __ And(out, out, AT); + __ LoadConst32(TMP, 0x01010101); + if (isR6) { + __ MulR6(out, out, TMP); + } else { + __ MulR2(out, out, TMP); + } + __ Srl(out, out, 24); + } else { + DCHECK_EQ(type, DataType::Type::kInt64); + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register tmp_hi = locations->GetTemp(0).AsRegister<Register>(); + Register out_hi = locations->GetTemp(1).AsRegister<Register>(); + Register tmp_lo = TMP; + Register out_lo = out; - __ Srl(tmp_lo, in_lo, 1); - __ Srl(tmp_hi, in_hi, 1); + __ Srl(tmp_lo, in_lo, 1); + __ Srl(tmp_hi, in_hi, 1); - __ LoadConst32(AT, 0x55555555); + __ LoadConst32(AT, 0x55555555); - __ And(tmp_lo, tmp_lo, AT); - __ Subu(tmp_lo, in_lo, tmp_lo); + __ And(tmp_lo, tmp_lo, AT); + __ Subu(tmp_lo, in_lo, tmp_lo); - __ And(tmp_hi, tmp_hi, AT); - __ Subu(tmp_hi, in_hi, tmp_hi); + __ And(tmp_hi, tmp_hi, AT); + __ Subu(tmp_hi, in_hi, tmp_hi); - __ LoadConst32(AT, 0x33333333); + __ LoadConst32(AT, 0x33333333); - __ And(out_lo, tmp_lo, AT); - __ Srl(tmp_lo, tmp_lo, 2); - __ And(tmp_lo, tmp_lo, AT); - __ Addu(tmp_lo, out_lo, tmp_lo); + __ And(out_lo, tmp_lo, AT); + __ Srl(tmp_lo, tmp_lo, 2); + __ And(tmp_lo, tmp_lo, AT); + __ Addu(tmp_lo, out_lo, tmp_lo); - __ And(out_hi, tmp_hi, AT); - __ Srl(tmp_hi, tmp_hi, 2); - __ And(tmp_hi, tmp_hi, AT); - __ Addu(tmp_hi, out_hi, tmp_hi); + __ And(out_hi, tmp_hi, AT); + __ Srl(tmp_hi, tmp_hi, 2); + __ And(tmp_hi, tmp_hi, AT); + __ Addu(tmp_hi, out_hi, tmp_hi); - // Here we deviate from the original algorithm a bit. We've reached - // the stage where the bitfields holding the subtotals are large - // enough to hold the combined subtotals for both the low word, and - // the high word. This means that we can add the subtotals for the - // the high, and low words into a single word, and compute the final - // result for both the high, and low words using fewer instructions. - __ LoadConst32(AT, 0x0F0F0F0F); + // Here we deviate from the original algorithm a bit. We've reached + // the stage where the bitfields holding the subtotals are large + // enough to hold the combined subtotals for both the low word, and + // the high word. This means that we can add the subtotals for the + // the high, and low words into a single word, and compute the final + // result for both the high, and low words using fewer instructions. + __ LoadConst32(AT, 0x0F0F0F0F); - __ Addu(TMP, tmp_hi, tmp_lo); + __ Addu(TMP, tmp_hi, tmp_lo); - __ Srl(out, TMP, 4); - __ And(out, out, AT); - __ And(TMP, TMP, AT); - __ Addu(out, out, TMP); + __ Srl(out, TMP, 4); + __ And(out, out, AT); + __ And(TMP, TMP, AT); + __ Addu(out, out, TMP); - __ LoadConst32(AT, 0x01010101); + __ LoadConst32(AT, 0x01010101); - if (isR6) { - __ MulR6(out, out, AT); - } else { - __ MulR2(out, out, AT); - } + if (isR6) { + __ MulR6(out, out, AT); + } else { + __ MulR2(out, out, AT); + } - __ Srl(out, out, 24); + __ Srl(out, out, 24); + } } } @@ -725,7 +747,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), HasMsa(), GetAssembler()); } // int java.lang.Long.bitCount(int) @@ -739,575 +761,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler()); -} - -static void MathAbsFP(LocationSummary* locations, - bool is64bit, - bool isR2OrNewer, - bool isR6, - MipsAssembler* assembler) { - FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - - // Note, as a "quality of implementation", rather than pure "spec compliance", we require that - // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN - // (signaling NaN may become quiet though). - // - // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case, - // both regular floating point numbers and NAN values are treated alike, only the sign bit is - // affected by this instruction. - // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any - // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be - // changed when doing abs(NaN). Because of that, we clear sign bit in a different way. - if (isR6) { - if (is64bit) { - __ AbsD(out, in); - } else { - __ AbsS(out, in); - } - } else { - if (is64bit) { - if (in != out) { - __ MovD(out, in); - } - __ MoveFromFpuHigh(TMP, in); - // ins instruction is not available for R1. - if (isR2OrNewer) { - __ Ins(TMP, ZERO, 31, 1); - } else { - __ Sll(TMP, TMP, 1); - __ Srl(TMP, TMP, 1); - } - __ MoveToFpuHigh(TMP, out); - } else { - __ Mfc1(TMP, in); - // ins instruction is not available for R1. - if (isR2OrNewer) { - __ Ins(TMP, ZERO, 31, 1); - } else { - __ Sll(TMP, TMP, 1); - __ Srl(TMP, TMP, 1); - } - __ Mtc1(TMP, out); - } - } -} - -// double java.lang.Math.abs(double) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, IsR2OrNewer(), IsR6(), GetAssembler()); -} - -// float java.lang.Math.abs(float) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, IsR2OrNewer(), IsR6(), GetAssembler()); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { - if (is64bit) { - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - // The comments in this section show the analogous operations which would - // be performed if we had 64-bit registers "in", and "out". - // __ Dsra32(AT, in, 31); - __ Sra(AT, in_hi, 31); - // __ Xor(out, in, AT); - __ Xor(TMP, in_lo, AT); - __ Xor(out_hi, in_hi, AT); - // __ Dsubu(out, out, AT); - __ Subu(out_lo, TMP, AT); - __ Sltu(TMP, out_lo, TMP); - __ Addu(out_hi, out_hi, TMP); - } else { - Register in = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - __ Sra(AT, in, 31); - __ Xor(out, in, AT); - __ Subu(out, out, AT); - } -} - -// int java.lang.Math.abs(int) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -// long java.lang.Math.abs(long) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type, - bool is_R6, - MipsAssembler* assembler) { - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); - - if (is_R6) { - MipsLabel noNaNs; - MipsLabel done; - FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); - } else { - MipsLabel ordered; - MipsLabel compare; - MipsLabel select; - MipsLabel done; - - if (type == DataType::Type::kFloat64) { - __ CunD(a, b); - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CunS(a, b); - } - __ Bc1f(&ordered); - - // a or b (or both) is a NaN. Return one, which is a NaN. - if (type == DataType::Type::kFloat64) { - __ CeqD(b, b); - } else { - __ CeqS(b, b); - } - __ B(&select); - - __ Bind(&ordered); - - // Neither is a NaN. - // a == b? (-0.0 compares equal with +0.0) - // If equal, handle zeroes, else compare further. - if (type == DataType::Type::kFloat64) { - __ CeqD(a, b); - } else { - __ CeqS(a, b); - } - __ Bc1f(&compare); - - // a == b either bit for bit or one is -0.0 and the other is +0.0. - if (type == DataType::Type::kFloat64) { - __ MoveFromFpuHigh(TMP, a); - __ MoveFromFpuHigh(AT, b); - } else { - __ Mfc1(TMP, a); - __ Mfc1(AT, b); - } - - if (is_min) { - // -0.0 prevails over +0.0. - __ Or(TMP, TMP, AT); - } else { - // +0.0 prevails over -0.0. - __ And(TMP, TMP, AT); - } - - if (type == DataType::Type::kFloat64) { - __ Mfc1(AT, a); - __ Mtc1(AT, out); - __ MoveToFpuHigh(TMP, out); - } else { - __ Mtc1(TMP, out); - } - __ B(&done); - - __ Bind(&compare); - - if (type == DataType::Type::kFloat64) { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeD(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeD(b, a); // b <= a - } - } else { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeS(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeS(b, a); // b <= a - } - } - - __ Bind(&select); - - if (type == DataType::Type::kFloat64) { - __ MovtD(out, a); - __ MovfD(out, b); - } else { - __ MovtS(out, a); - __ MovfS(out, b); - } - - __ Bind(&done); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); -} - -// double java.lang.Math.min(double, double) -void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kFloat64, - IsR6(), - GetAssembler()); -} - -// float java.lang.Math.min(float, float) -void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kFloat32, - IsR6(), - GetAssembler()); -} - -// double java.lang.Math.max(double, double) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kFloat64, - IsR6(), - GetAssembler()); -} - -// float java.lang.Math.max(float, float) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kFloat32, - IsR6(), - GetAssembler()); -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - DataType::Type type, - bool is_R6, - MipsAssembler* assembler) { - if (is_R6) { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions - // always change the target (output) register. If the condition is - // true the output register gets the contents of the "rs" register; - // otherwise, the output register is set to zero. One consequence - // of this is that to implement something like "rd = c==0 ? rs : rt" - // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. - // After executing this pair of instructions one of the output - // registers from the pair will necessarily contain zero. Then the - // code ORs the output registers from the SELEQZ/SELNEZ instructions - // to get the final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, b_hi, a_hi); - __ Bne(b_hi, a_hi, &compare_done); - - __ Sltu(TMP, b_lo, a_lo); - - __ Bind(&compare_done); - - if (is_min) { - __ Seleqz(AT, a_lo, TMP); - __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo - // because at this point we're - // done using a_lo/b_lo. - } else { - __ Selnez(AT, a_lo, TMP); - __ Seleqz(out_lo, b_lo, TMP); // ditto - } - __ Or(out_lo, out_lo, AT); - if (is_min) { - __ Seleqz(AT, a_hi, TMP); - __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } else { - __ Selnez(AT, a_hi, TMP); - __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } - __ Or(out_hi, out_hi, AT); - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, b, a); - if (is_min) { - __ Seleqz(TMP, a, AT); - __ Selnez(AT, b, AT); - } else { - __ Selnez(TMP, a, AT); - __ Seleqz(AT, b, AT); - } - __ Or(out, TMP, AT); - } - } - } else { - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, a_hi, b_hi); - __ Bne(a_hi, b_hi, &compare_done); - - __ Sltu(TMP, a_lo, b_lo); - - __ Bind(&compare_done); - - if (is_min) { - if (out_lo != a_lo) { - __ Movn(out_hi, a_hi, TMP); - __ Movn(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movz(out_hi, b_hi, TMP); - __ Movz(out_lo, b_lo, TMP); - } - } else { - if (out_lo != a_lo) { - __ Movz(out_hi, a_hi, TMP); - __ Movz(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movn(out_hi, b_hi, TMP); - __ Movn(out_lo, b_lo, TMP); - } - } - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, a, b); - if (is_min) { - if (out != a) { - __ Movn(out, a, AT); - } - if (out != b) { - __ Movz(out, b, AT); - } - } else { - if (out != a) { - __ Movz(out, a, AT); - } - if (out != b) { - __ Movn(out, b, AT); - } - } - } - } - } -} - -// int java.lang.Math.min(int, int) -void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kInt32, - IsR6(), - GetAssembler()); -} - -// long java.lang.Math.min(long, long) -void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kInt64, - IsR6(), - GetAssembler()); -} - -// int java.lang.Math.max(int, int) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kInt32, - IsR6(), - GetAssembler()); -} - -// long java.lang.Math.max(long, long) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kInt64, - IsR6(), - GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), HasMsa(), GetAssembler()); } // double java.lang.Math.sqrt(double) diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h index 13397f11d4..1c1ba40132 100644 --- a/compiler/optimizing/intrinsics_mips.h +++ b/compiler/optimizing/intrinsics_mips.h @@ -71,6 +71,7 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor { bool IsR2OrNewer() const; bool IsR6() const; bool Is32BitFPU() const; + bool HasMsa() const; private: MipsAssembler* GetAssembler(); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 89f1818be2..f429afde2c 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -46,6 +46,10 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() { return codegen_->GetGraph()->GetAllocator(); } +inline bool IntrinsicCodeGeneratorMIPS64::HasMsa() const { + return codegen_->GetInstructionSetFeatures().HasMsa(); +} + #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, @@ -386,6 +390,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { static void GenBitCount(LocationSummary* locations, const DataType::Type type, + const bool hasMsa, Mips64Assembler* assembler) { GpuRegister out = locations->Out().AsRegister<GpuRegister>(); GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); @@ -414,41 +419,52 @@ static void GenBitCount(LocationSummary* locations, // bits are set but the algorithm here attempts to minimize the total // number of instructions executed even when a large number of bits // are set. - - if (type == DataType::Type::kInt32) { - __ Srl(TMP, in, 1); - __ LoadConst32(AT, 0x55555555); - __ And(TMP, TMP, AT); - __ Subu(TMP, in, TMP); - __ LoadConst32(AT, 0x33333333); - __ And(out, TMP, AT); - __ Srl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Addu(TMP, out, TMP); - __ Srl(out, TMP, 4); - __ Addu(out, out, TMP); - __ LoadConst32(AT, 0x0F0F0F0F); - __ And(out, out, AT); - __ LoadConst32(TMP, 0x01010101); - __ MulR6(out, out, TMP); - __ Srl(out, out, 24); - } else if (type == DataType::Type::kInt64) { - __ Dsrl(TMP, in, 1); - __ LoadConst64(AT, 0x5555555555555555L); - __ And(TMP, TMP, AT); - __ Dsubu(TMP, in, TMP); - __ LoadConst64(AT, 0x3333333333333333L); - __ And(out, TMP, AT); - __ Dsrl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Daddu(TMP, out, TMP); - __ Dsrl(out, TMP, 4); - __ Daddu(out, out, TMP); - __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL); - __ And(out, out, AT); - __ LoadConst64(TMP, 0x0101010101010101L); - __ Dmul(out, out, TMP); - __ Dsrl32(out, out, 24); + if (hasMsa) { + if (type == DataType::Type::kInt32) { + __ Mtc1(in, FTMP); + __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Mfc1(out, FTMP); + } else { + __ Dmtc1(in, FTMP); + __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Dmfc1(out, FTMP); + } + } else { + if (type == DataType::Type::kInt32) { + __ Srl(TMP, in, 1); + __ LoadConst32(AT, 0x55555555); + __ And(TMP, TMP, AT); + __ Subu(TMP, in, TMP); + __ LoadConst32(AT, 0x33333333); + __ And(out, TMP, AT); + __ Srl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Addu(TMP, out, TMP); + __ Srl(out, TMP, 4); + __ Addu(out, out, TMP); + __ LoadConst32(AT, 0x0F0F0F0F); + __ And(out, out, AT); + __ LoadConst32(TMP, 0x01010101); + __ MulR6(out, out, TMP); + __ Srl(out, out, 24); + } else { + __ Dsrl(TMP, in, 1); + __ LoadConst64(AT, 0x5555555555555555L); + __ And(TMP, TMP, AT); + __ Dsubu(TMP, in, TMP); + __ LoadConst64(AT, 0x3333333333333333L); + __ And(out, TMP, AT); + __ Dsrl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Daddu(TMP, out, TMP); + __ Dsrl(out, TMP, 4); + __ Daddu(out, out, TMP); + __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL); + __ And(out, out, AT); + __ LoadConst64(TMP, 0x0101010101010101L); + __ Dmul(out, out, TMP); + __ Dsrl32(out, out, 24); + } } } @@ -458,7 +474,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, HasMsa(), GetAssembler()); } // int java.lang.Long.bitCount(long) @@ -467,291 +483,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); -} - -static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - if (is64bit) { - __ AbsD(out, in); - } else { - __ AbsS(out, in); - } -} - -// double java.lang.Math.abs(double) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -// float java.lang.Math.abs(float) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -static void CreateIntToInt(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (is64bit) { - __ Dsra32(AT, in, 31); - __ Xor(out, in, AT); - __ Dsubu(out, out, AT); - } else { - __ Sra(AT, in, 31); - __ Xor(out, in, AT); - __ Subu(out, out, AT); - } -} - -// int java.lang.Math.abs(int) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToInt(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -// long java.lang.Math.abs(long) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToInt(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type, - Mips64Assembler* assembler) { - FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - Mips64Label noNaNs; - Mips64Label done; - FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -// double java.lang.Math.min(double, double) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat64, GetAssembler()); -} - -// float java.lang.Math.min(float, float) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat32, GetAssembler()); -} - -// double java.lang.Math.max(double, double) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat64, GetAssembler()); -} - -// float java.lang.Math.max(float, float) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat32, GetAssembler()); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - Mips64Assembler* assembler) { - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (lhs == rhs) { - if (out != lhs) { - __ Move(out, lhs); - } - } else { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always - // change the target (output) register. If the condition is true the - // output register gets the contents of the "rs" register; otherwise, - // the output register is set to zero. One consequence of this is - // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 - // needs to use a pair of SELEQZ/SELNEZ instructions. After - // executing this pair of instructions one of the output registers - // from the pair will necessarily contain zero. Then the code ORs the - // output registers from the SELEQZ/SELNEZ instructions to get the - // final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (out == lhs) { - __ Slt(AT, rhs, lhs); - if (is_min) { - __ Seleqz(out, lhs, AT); - __ Selnez(AT, rhs, AT); - } else { - __ Selnez(out, lhs, AT); - __ Seleqz(AT, rhs, AT); - } - } else { - __ Slt(AT, lhs, rhs); - if (is_min) { - __ Seleqz(out, rhs, AT); - __ Selnez(AT, lhs, AT); - } else { - __ Selnez(out, rhs, AT); - __ Seleqz(AT, lhs, AT); - } - } - __ Or(out, out, AT); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -// int java.lang.Math.min(int, int) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); -} - -// long java.lang.Math.min(long, long) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); -} - -// int java.lang.Math.max(int, int) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); -} - -// long java.lang.Math.max(long, long) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, HasMsa(), GetAssembler()); } // double java.lang.Math.sqrt(double) diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h index 6f40d90ddb..748b0b02b2 100644 --- a/compiler/optimizing/intrinsics_mips64.h +++ b/compiler/optimizing/intrinsics_mips64.h @@ -68,6 +68,8 @@ class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor { #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS + bool HasMsa() const; + private: Mips64Assembler* GetAssembler(); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 46b7f3f1ce..c4f322bf0c 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -40,11 +40,6 @@ namespace art { namespace x86 { -static constexpr int kDoubleNaNHigh = 0x7FF80000; -static constexpr int kDoubleNaNLow = 0x00000000; -static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); -static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); - IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) { @@ -333,432 +328,6 @@ void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } - -// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we -// need is 64b. - -static void CreateFloatToFloat(ArenaAllocator* allocator, HInvoke* invoke) { - // TODO: Enable memory operations when the assembler supports them. - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(static_or_direct != nullptr); - if (static_or_direct->HasSpecialInput() && - invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { - // We need addressibility for the constant area. - locations->SetInAt(1, Location::RequiresRegister()); - // We need a temporary to hold the constant. - locations->AddTemp(Location::RequiresFpuRegister()); - } -} - -static void MathAbsFP(HInvoke* invoke, - bool is64bit, - X86Assembler* assembler, - CodeGeneratorX86* codegen) { - LocationSummary* locations = invoke->GetLocations(); - Location output = locations->Out(); - - DCHECK(output.IsFpuRegister()); - if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { - HX86ComputeBaseMethodAddress* method_address = - invoke->InputAt(1)->AsX86ComputeBaseMethodAddress(); - DCHECK(locations->InAt(1).IsRegister()); - // We also have a constant area pointer. - Register constant_area = locations->InAt(1).AsRegister<Register>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - if (is64bit) { - __ movsd(temp, codegen->LiteralInt64Address( - INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area)); - __ andpd(output.AsFpuRegister<XmmRegister>(), temp); - } else { - __ movss(temp, codegen->LiteralInt32Address( - INT32_C(0x7FFFFFFF), method_address, constant_area)); - __ andps(output.AsFpuRegister<XmmRegister>(), temp); - } - } else { - // Create the right constant on an aligned stack. - if (is64bit) { - __ subl(ESP, Immediate(8)); - __ pushl(Immediate(0x7FFFFFFF)); - __ pushl(Immediate(0xFFFFFFFF)); - __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); - } else { - __ subl(ESP, Immediate(12)); - __ pushl(Immediate(0x7FFFFFFF)); - __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); - } - __ addl(ESP, Immediate(16)); - } -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) { - CreateFloatToFloat(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { - CreateFloatToFloat(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_); -} - -static void CreateAbsIntLocation(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RegisterLocation(EAX)); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RegisterLocation(EDX)); -} - -static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) { - Location output = locations->Out(); - Register out = output.AsRegister<Register>(); - DCHECK_EQ(out, EAX); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - DCHECK_EQ(temp, EDX); - - // Sign extend EAX into EDX. - __ cdq(); - - // XOR EAX with sign. - __ xorl(EAX, EDX); - - // Subtract out sign to correct. - __ subl(EAX, EDX); - - // The result is in EAX. -} - -static void CreateAbsLongLocation(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) { - Location input = locations->InAt(0); - Register input_lo = input.AsRegisterPairLow<Register>(); - Register input_hi = input.AsRegisterPairHigh<Register>(); - Location output = locations->Out(); - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - - // Compute the sign into the temporary. - __ movl(temp, input_hi); - __ sarl(temp, Immediate(31)); - - // Store the sign into the output. - __ movl(output_lo, temp); - __ movl(output_hi, temp); - - // XOR the input to the output. - __ xorl(output_lo, input_lo); - __ xorl(output_hi, input_hi); - - // Subtract the sign. - __ subl(output_lo, temp); - __ sbbl(output_hi, temp); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) { - CreateAbsIntLocation(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) { - CreateAbsLongLocation(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) { - GenAbsLong(invoke->GetLocations(), GetAssembler()); -} - -static void GenMinMaxFP(HInvoke* invoke, - bool is_min, - bool is_double, - X86Assembler* assembler, - CodeGeneratorX86* codegen) { - LocationSummary* locations = invoke->GetLocations(); - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - Location out_loc = locations->Out(); - XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); - return; - } - - // (out := op1) - // out <=? op2 - // if Nan jmp Nan_label - // if out is min jmp done - // if op2 is min jmp op2_label - // handle -0/+0 - // jmp done - // Nan_label: - // out := NaN - // op2_label: - // out := op2 - // done: - // - // This removes one jmp, but needs to copy one input (op1) to out. - // - // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? - - XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - - NearLabel nan, done, op2_label; - if (is_double) { - __ ucomisd(out, op2); - } else { - __ ucomiss(out, op2); - } - - __ j(Condition::kParityEven, &nan); - - __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); - __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); - - // Handle 0.0/-0.0. - if (is_min) { - if (is_double) { - __ orpd(out, op2); - } else { - __ orps(out, op2); - } - } else { - if (is_double) { - __ andpd(out, op2); - } else { - __ andps(out, op2); - } - } - __ jmp(&done); - - // NaN handling. - __ Bind(&nan); - // Do we have a constant area pointer? - if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) { - HX86ComputeBaseMethodAddress* method_address = - invoke->InputAt(2)->AsX86ComputeBaseMethodAddress(); - DCHECK(locations->InAt(2).IsRegister()); - Register constant_area = locations->InAt(2).AsRegister<Register>(); - if (is_double) { - __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area)); - } else { - __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area)); - } - } else { - if (is_double) { - __ pushl(Immediate(kDoubleNaNHigh)); - __ pushl(Immediate(kDoubleNaNLow)); - __ movsd(out, Address(ESP, 0)); - __ addl(ESP, Immediate(8)); - } else { - __ pushl(Immediate(kFloatNaN)); - __ movss(out, Address(ESP, 0)); - __ addl(ESP, Immediate(4)); - } - } - __ jmp(&done); - - // out := op2; - __ Bind(&op2_label); - if (is_double) { - __ movsd(out, op2); - } else { - __ movss(out, op2); - } - - // Done. - __ Bind(&done); -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - // The following is sub-optimal, but all we can do for now. It would be fine to also accept - // the second input to be the output (we can simply swap inputs). - locations->SetOut(Location::SameAsFirstInput()); - HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(static_or_direct != nullptr); - if (static_or_direct->HasSpecialInput() && - invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { - locations->SetInAt(2, Location::RequiresRegister()); - } -} - -void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ true, - /* is_double */ true, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ true, - /* is_double */ false, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ false, - /* is_double */ true, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ false, - /* is_double */ false, - GetAssembler(), - codegen_); -} - -static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, - X86Assembler* assembler) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - // Can return immediately, as op1_loc == out_loc. - // Note: if we ever support separate registers, e.g., output into memory, we need to check for - // a copy here. - DCHECK(locations->Out().Equals(op1_loc)); - return; - } - - if (is_long) { - // Need to perform a subtract to get the sign right. - // op1 is already in the same location as the output. - Location output = locations->Out(); - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); - - Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); - Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); - - // Spare register to compute the subtraction to set condition code. - Register temp = locations->GetTemp(0).AsRegister<Register>(); - - // Subtract off op2_low. - __ movl(temp, output_lo); - __ subl(temp, op2_lo); - - // Now use the same tempo and the borrow to finish the subtraction of op2_hi. - __ movl(temp, output_hi); - __ sbbl(temp, op2_hi); - - // Now the condition code is correct. - Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; - __ cmovl(cond, output_lo, op2_lo); - __ cmovl(cond, output_hi, op2_hi); - } else { - Register out = locations->Out().AsRegister<Register>(); - Register op2 = op2_loc.AsRegister<Register>(); - - // (out := op1) - // out <=? op2 - // if out is min jmp done - // out := op2 - // done: - - __ cmpl(out, op2); - Condition cond = is_min ? Condition::kGreater : Condition::kLess; - __ cmovl(cond, out, op2); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - // Register to use to perform a long subtract to set cc. - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); -} - static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 6483b7cb2a..437bc3dd3c 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -236,304 +236,6 @@ void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } - -// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we -// need is 64b. - -static void CreateFloatToFloatPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) { - // TODO: Enable memory operations when the assembler supports them. - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. -} - -static void MathAbsFP(LocationSummary* locations, - bool is64bit, - X86_64Assembler* assembler, - CodeGeneratorX86_64* codegen) { - Location output = locations->Out(); - - DCHECK(output.IsFpuRegister()); - XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - - // TODO: Can mask directly with constant area using pand if we can guarantee - // that the literal is aligned on a 16 byte boundary. This will avoid a - // temporary. - if (is64bit) { - __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); - __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } else { - __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); - __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFloatToFloatPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFloatToFloatPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_); -} - -static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { - Location output = locations->Out(); - CpuRegister out = output.AsRegister<CpuRegister>(); - CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); - - if (is64bit) { - // Create mask. - __ movq(mask, out); - __ sarq(mask, Immediate(63)); - // Add mask. - __ addq(out, mask); - __ xorq(out, mask); - } else { - // Create mask. - __ movl(mask, out); - __ sarl(mask, Immediate(31)); - // Add mask. - __ addl(out, mask); - __ xorl(out, mask); - } -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - bool is_double, - X86_64Assembler* assembler, - CodeGeneratorX86_64* codegen) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - Location out_loc = locations->Out(); - XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); - return; - } - - // (out := op1) - // out <=? op2 - // if Nan jmp Nan_label - // if out is min jmp done - // if op2 is min jmp op2_label - // handle -0/+0 - // jmp done - // Nan_label: - // out := NaN - // op2_label: - // out := op2 - // done: - // - // This removes one jmp, but needs to copy one input (op1) to out. - // - // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? - - XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - - NearLabel nan, done, op2_label; - if (is_double) { - __ ucomisd(out, op2); - } else { - __ ucomiss(out, op2); - } - - __ j(Condition::kParityEven, &nan); - - __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); - __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); - - // Handle 0.0/-0.0. - if (is_min) { - if (is_double) { - __ orpd(out, op2); - } else { - __ orps(out, op2); - } - } else { - if (is_double) { - __ andpd(out, op2); - } else { - __ andps(out, op2); - } - } - __ jmp(&done); - - // NaN handling. - __ Bind(&nan); - if (is_double) { - __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); - } else { - __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); - } - __ jmp(&done); - - // out := op2; - __ Bind(&op2_label); - if (is_double) { - __ movsd(out, op2); - } else { - __ movss(out, op2); - } - - // Done. - __ Bind(&done); -} - -static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - // The following is sub-optimal, but all we can do for now. It would be fine to also accept - // the second input to be the output (we can simply swap inputs). - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_); -} - -static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, - X86_64Assembler* assembler) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - // Can return immediately, as op1_loc == out_loc. - // Note: if we ever support separate registers, e.g., output into memory, we need to check for - // a copy here. - DCHECK(locations->Out().Equals(op1_loc)); - return; - } - - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); - - // (out := op1) - // out <=? op2 - // if out is min jmp done - // out := op2 - // done: - - if (is_long) { - __ cmpq(out, op2); - } else { - __ cmpl(out, op2); - } - - __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); -} - static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index aae94b227c..237ecd3c10 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -834,9 +834,12 @@ class LSEVisitor : public HGraphDelegateVisitor { // Singleton references cannot be seen by the callee. } else { if (side_effects.DoesAnyRead()) { + // Invocation may read the heap value. KeepIfIsStore(heap_values[i]); } if (side_effects.DoesAnyWrite()) { + // Keep the store since it's not used to track the heap value anymore. + KeepIfIsStore(heap_values[i]); heap_values[i] = kUnknownHeapValue; } } @@ -879,6 +882,7 @@ class LSEVisitor : public HGraphDelegateVisitor { } if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) { DCHECK(!new_instance->IsFinalizable()); + // new_instance can potentially be eliminated. singleton_new_instances_.push_back(new_instance); } ScopedArenaVector<HInstruction*>& heap_values = @@ -901,7 +905,13 @@ class LSEVisitor : public HGraphDelegateVisitor { return; } if (ref_info->IsSingletonAndRemovable()) { - singleton_new_instances_.push_back(new_array); + if (new_array->GetLength()->IsIntConstant() && + new_array->GetLength()->AsIntConstant()->GetValue() >= 0) { + // new_array can potentially be eliminated. + singleton_new_instances_.push_back(new_array); + } else { + // new_array may throw NegativeArraySizeException. Keep it. + } } ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[new_array->GetBlock()->GetBlockId()]; diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 3dc1ef7534..d3b081e005 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -30,46 +30,6 @@ namespace art { -// TODO: Clean up the packed type detection so that we have the right type straight away -// and do not need to go through this normalization. -static inline void NormalizePackedType(/* inout */ DataType::Type* type, - /* inout */ bool* is_unsigned) { - switch (*type) { - case DataType::Type::kBool: - DCHECK(!*is_unsigned); - break; - case DataType::Type::kUint8: - case DataType::Type::kInt8: - if (*is_unsigned) { - *is_unsigned = false; - *type = DataType::Type::kUint8; - } else { - *type = DataType::Type::kInt8; - } - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - if (*is_unsigned) { - *is_unsigned = false; - *type = DataType::Type::kUint16; - } else { - *type = DataType::Type::kInt16; - } - break; - case DataType::Type::kInt32: - case DataType::Type::kInt64: - // We do not have kUint32 and kUint64 at the moment. - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK(!*is_unsigned); - break; - default: - LOG(FATAL) << "Unexpected type " << *type; - UNREACHABLE(); - } -} - // Enables vectorization (SIMDization) in the loop optimizer. static constexpr bool kEnableVectorization = true; @@ -374,29 +334,14 @@ static bool IsAddConst(HInstruction* instruction, // Detect reductions of the following forms, // x = x_phi + .. // x = x_phi - .. -// x = max(x_phi, ..) // x = min(x_phi, ..) +// x = max(x_phi, ..) static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { - if (reduction->IsAdd()) { + if (reduction->IsAdd() || reduction->IsMin() || reduction->IsMax()) { return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) || (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi); } else if (reduction->IsSub()) { return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi); - } else if (reduction->IsInvokeStaticOrDirect()) { - switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: - return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) || - (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi); - default: - return false; - } } return false; } @@ -1337,78 +1282,59 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return true; } } - } else if (instruction->IsInvokeStaticOrDirect()) { - // Accept particular intrinsics. - HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect(); - switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - case Intrinsics::kMathAbsFloat: - case Intrinsics::kMathAbsDouble: { - // Deal with vector restrictions. - HInstruction* opa = instruction->InputAt(0); - HInstruction* r = opa; - bool is_unsigned = false; - if (HasVectorRestrictions(restrictions, kNoAbs)) { - return false; - } else if (HasVectorRestrictions(restrictions, kNoHiBits) && - (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { - return false; // reject, unless operand is sign-extension narrower - } - // Accept ABS(x) for vectorizable operand. - DCHECK(r != nullptr); - if (generate_code && vector_mode_ != kVector) { // de-idiom - r = opa; - } - if (VectorizeUse(node, r, generate_code, type, restrictions)) { - if (generate_code) { - NormalizePackedType(&type, &is_unsigned); - GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type); - } - return true; - } - return false; + } else if (instruction->IsAbs()) { + // Deal with vector restrictions. + HInstruction* opa = instruction->InputAt(0); + HInstruction* r = opa; + bool is_unsigned = false; + if (HasVectorRestrictions(restrictions, kNoAbs)) { + return false; + } else if (HasVectorRestrictions(restrictions, kNoHiBits) && + (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { + return false; // reject, unless operand is sign-extension narrower + } + // Accept ABS(x) for vectorizable operand. + DCHECK(r != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + } + if (VectorizeUse(node, r, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, + vector_map_->Get(r), + nullptr, + HVecOperation::ToProperType(type, is_unsigned)); } - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: { - // Deal with vector restrictions. - HInstruction* opa = instruction->InputAt(0); - HInstruction* opb = instruction->InputAt(1); - HInstruction* r = opa; - HInstruction* s = opb; - bool is_unsigned = false; - if (HasVectorRestrictions(restrictions, kNoMinMax)) { - return false; - } else if (HasVectorRestrictions(restrictions, kNoHiBits) && - !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) { - return false; // reject, unless all operands are same-extension narrower - } - // Accept MIN/MAX(x, y) for vectorizable operands. - DCHECK(r != nullptr); - DCHECK(s != nullptr); - if (generate_code && vector_mode_ != kVector) { // de-idiom - r = opa; - s = opb; - } - if (VectorizeUse(node, r, generate_code, type, restrictions) && - VectorizeUse(node, s, generate_code, type, restrictions)) { - if (generate_code) { - GenerateVecOp( - instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned); - } - return true; - } - return false; + return true; + } + } else if (instruction->IsMin() || instruction->IsMax()) { + // Deal with vector restrictions. + HInstruction* opa = instruction->InputAt(0); + HInstruction* opb = instruction->InputAt(1); + HInstruction* r = opa; + HInstruction* s = opb; + bool is_unsigned = false; + if (HasVectorRestrictions(restrictions, kNoMinMax)) { + return false; + } else if (HasVectorRestrictions(restrictions, kNoHiBits) && + !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) { + return false; // reject, unless all operands are same-extension narrower + } + // Accept MIN/MAX(x, y) for vectorizable operands. + DCHECK(r != nullptr); + DCHECK(s != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + s = opb; + } + if (VectorizeUse(node, r, generate_code, type, restrictions) && + VectorizeUse(node, s, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp( + instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned); } - default: - return false; - } // switch + return true; + } } return false; } @@ -1849,75 +1775,29 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, GENERATE_VEC( new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_, dex_pc), new (global_allocator_) HUShr(org_type, opa, opb, dex_pc)); - case HInstruction::kInvokeStaticOrDirect: { - HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect(); - if (vector_mode_ == kVector) { - switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - case Intrinsics::kMathAbsFloat: - case Intrinsics::kMathAbsDouble: - DCHECK(opb == nullptr); - vector = new (global_allocator_) - HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc); - break; - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: { - NormalizePackedType(&type, &is_unsigned); - vector = new (global_allocator_) - HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned, dex_pc); - break; - } - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: { - NormalizePackedType(&type, &is_unsigned); - vector = new (global_allocator_) - HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned, dex_pc); - break; - } - default: - LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId(); - UNREACHABLE(); - } // switch invoke - } else { - // In scalar code, simply clone the method invoke, and replace its operands with the - // corresponding new scalar instructions in the loop. The instruction will get an - // environment while being inserted from the instruction map in original program order. - DCHECK(vector_mode_ == kSequential); - size_t num_args = invoke->GetNumberOfArguments(); - HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect( - global_allocator_, - num_args, - invoke->GetType(), - invoke->GetDexPc(), - invoke->GetDexMethodIndex(), - invoke->GetResolvedMethod(), - invoke->GetDispatchInfo(), - invoke->GetInvokeType(), - invoke->GetTargetMethod(), - invoke->GetClinitCheckRequirement()); - HInputsRef inputs = invoke->GetInputs(); - size_t num_inputs = inputs.size(); - DCHECK_LE(num_args, num_inputs); - DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree - for (size_t index = 0; index < num_inputs; ++index) { - HInstruction* new_input = index < num_args - ? vector_map_->Get(inputs[index]) - : inputs[index]; // beyond arguments: just pass through - new_invoke->SetArgumentAt(index, new_input); - } - new_invoke->SetIntrinsic(invoke->GetIntrinsic(), - kNeedsEnvironmentOrCache, - kNoSideEffects, - kNoThrow); - vector = new_invoke; - } - break; - } + case HInstruction::kMin: + GENERATE_VEC( + new (global_allocator_) HVecMin(global_allocator_, + opa, + opb, + HVecOperation::ToProperType(type, is_unsigned), + vector_length_, + dex_pc), + new (global_allocator_) HMin(org_type, opa, opb, dex_pc)); + case HInstruction::kMax: + GENERATE_VEC( + new (global_allocator_) HVecMax(global_allocator_, + opa, + opb, + HVecOperation::ToProperType(type, is_unsigned), + vector_length_, + dex_pc), + new (global_allocator_) HMax(org_type, opa, opb, dex_pc)); + case HInstruction::kAbs: + DCHECK(opb == nullptr); + GENERATE_VEC( + new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc), + new (global_allocator_) HAbs(org_type, opa, dex_pc)); default: break; } // switch @@ -1987,15 +1867,13 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, VectorizeUse(node, s, generate_code, type, restrictions)) { if (generate_code) { if (vector_mode_ == kVector) { - NormalizePackedType(&type, &is_unsigned); vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd( global_allocator_, vector_map_->Get(r), vector_map_->Get(s), - type, + HVecOperation::ToProperType(type, is_unsigned), vector_length_, is_rounded, - is_unsigned, kNoDexPc)); MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); } else { @@ -2030,9 +1908,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, HInstruction* v = instruction->InputAt(1); HInstruction* a = nullptr; HInstruction* b = nullptr; - if (v->IsInvokeStaticOrDirect() && - (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt || - v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) { + if (v->GetType() == reduction_type && v->IsAbs()) { HInstruction* x = v->InputAt(0); if (x->GetType() == reduction_type) { int64_t c = 0; @@ -2086,14 +1962,13 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, VectorizeUse(node, r, generate_code, sub_type, restrictions) && VectorizeUse(node, s, generate_code, sub_type, restrictions)) { if (generate_code) { - NormalizePackedType(&reduction_type, &is_unsigned); if (vector_mode_ == kVector) { vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate( global_allocator_, vector_map_->Get(q), vector_map_->Get(r), vector_map_->Get(s), - reduction_type, + HVecOperation::ToProperType(reduction_type, is_unsigned), GetOtherVL(reduction_type, sub_type, vector_length_), kNoDexPc)); MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 5587f87dae..a8ddb7cfdc 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1121,10 +1121,6 @@ void HEnvironment::RemoveAsUserOfInput(size_t index) const { user->FixUpUserRecordsAfterEnvUseRemoval(before_env_use_node); } -HInstruction::InstructionKind HInstruction::GetKind() const { - return GetKindInternal(); -} - HInstruction* HInstruction::GetNextDisregardingMoves() const { HInstruction* next = GetNext(); while (next != nullptr && next->IsParallelMove()) { @@ -2895,6 +2891,8 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind return os << "BootImageLinkTimePcRelative"; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: return os << "DirectAddress"; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: + return os << "BootImageRelRo"; case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: return os << "BssEntry"; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: @@ -3105,8 +3103,6 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) { return os << "array_object_check"; case TypeCheckKind::kArrayCheck: return os << "array_check"; - case TypeCheckKind::kBitstringCheck: - return os << "bitstring_check"; default: LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs); UNREACHABLE(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index b0657d6f1c..d42f4a7e80 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -26,17 +26,18 @@ #include "base/arena_object.h" #include "base/array_ref.h" #include "base/iteration_range.h" +#include "base/quasi_atomic.h" #include "base/stl_util.h" #include "base/transform_array_ref.h" #include "data_type.h" #include "deoptimization_kind.h" #include "dex/dex_file.h" #include "dex/dex_file_types.h" +#include "dex/invoke_type.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "handle.h" #include "handle_scope.h" #include "intrinsics_enum.h" -#include "invoke_type.h" #include "locations.h" #include "method_reference.h" #include "mirror/class.h" @@ -1337,6 +1338,7 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ M(Above, Condition) \ M(AboveOrEqual, Condition) \ + M(Abs, UnaryOperation) \ M(Add, BinaryOperation) \ M(And, BinaryOperation) \ M(ArrayGet, Instruction) \ @@ -1382,7 +1384,9 @@ class HLoopInformationOutwardIterator : public ValueObject { M(LoadException, Instruction) \ M(LoadString, Instruction) \ M(LongConstant, Constant) \ + M(Max, Instruction) \ M(MemoryBarrier, Instruction) \ + M(Min, BinaryOperation) \ M(MonitorOperation, Instruction) \ M(Mul, BinaryOperation) \ M(NativeDebugInfo, Instruction) \ @@ -1518,7 +1522,6 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) private: \ H##type& operator=(const H##type&) = delete; \ public: \ - InstructionKind GetKindInternal() const OVERRIDE { return k##type; } \ const char* DebugName() const OVERRIDE { return #type; } \ bool InstructionTypeEquals(const HInstruction* other) const OVERRIDE { \ return other->Is##type(); \ @@ -1952,7 +1955,14 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { class HInstruction : public ArenaObject<kArenaAllocInstruction> { public: - HInstruction(SideEffects side_effects, uint32_t dex_pc) +#define DECLARE_KIND(type, super) k##type, + enum InstructionKind { + FOR_EACH_INSTRUCTION(DECLARE_KIND) + kLastInstructionKind + }; +#undef DECLARE_KIND + + HInstruction(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) : previous_(nullptr), next_(nullptr), block_(nullptr), @@ -1966,16 +1976,12 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { lifetime_position_(kNoLifetime), side_effects_(side_effects), reference_type_handle_(ReferenceTypeInfo::CreateInvalid().GetTypeHandle()) { + SetPackedField<InstructionKindField>(kind); SetPackedFlag<kFlagReferenceTypeIsExact>(ReferenceTypeInfo::CreateInvalid().IsExact()); } virtual ~HInstruction() {} -#define DECLARE_KIND(type, super) k##type, - enum InstructionKind { - FOR_EACH_INSTRUCTION(DECLARE_KIND) - }; -#undef DECLARE_KIND HInstruction* GetNext() const { return next_; } HInstruction* GetPrevious() const { return previous_; } @@ -2279,8 +2285,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide // the virtual function because the __attribute__((__pure__)) doesn't really // apply the strong requirement for virtual functions, preventing optimizations. - InstructionKind GetKind() const PURE; - virtual InstructionKind GetKindInternal() const = 0; + InstructionKind GetKind() const { return GetPackedField<InstructionKindField>(); } virtual size_t ComputeHashCode() const { size_t result = GetKind(); @@ -2332,9 +2337,16 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // its users. Used by liveness analysis to compute use positions accordingly. static constexpr size_t kFlagEmittedAtUseSite = 0u; static constexpr size_t kFlagReferenceTypeIsExact = kFlagEmittedAtUseSite + 1; - static constexpr size_t kNumberOfGenericPackedBits = kFlagReferenceTypeIsExact + 1; + static constexpr size_t kFieldInstructionKind = kFlagReferenceTypeIsExact + 1; + static constexpr size_t kFieldInstructionKindSize = + MinimumBitsToStore(static_cast<size_t>(InstructionKind::kLastInstructionKind - 1)); + static constexpr size_t kNumberOfGenericPackedBits = + kFieldInstructionKind + kFieldInstructionKindSize; static constexpr size_t kMaxNumberOfPackedBits = sizeof(uint32_t) * kBitsPerByte; + static_assert(kNumberOfGenericPackedBits <= kMaxNumberOfPackedBits, + "Too many generic packed fields"); + const HUserRecord<HInstruction*> InputRecordAt(size_t i) const { return GetInputRecords()[i]; } @@ -2391,9 +2403,13 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { live_interval_(nullptr), lifetime_position_(kNoLifetime), side_effects_(other.side_effects_), - reference_type_handle_(other.reference_type_handle_) {} + reference_type_handle_(other.reference_type_handle_) { + } private: + using InstructionKindField = + BitField<InstructionKind, kFieldInstructionKind, kFieldInstructionKindSize>; + void FixUpUserRecordsAfterUseInsertion(HUseList<HInstruction*>::iterator fixup_end) { auto before_use_node = uses_.before_begin(); for (auto use_node = uses_.begin(); use_node != fixup_end; ++use_node) { @@ -2569,12 +2585,13 @@ class HVariableInputSizeInstruction : public HInstruction { void RemoveAllInputs(); protected: - HVariableInputSizeInstruction(SideEffects side_effects, + HVariableInputSizeInstruction(InstructionKind inst_kind, + SideEffects side_effects, uint32_t dex_pc, ArenaAllocator* allocator, size_t number_of_inputs, ArenaAllocKind kind) - : HInstruction(side_effects, dex_pc), + : HInstruction(inst_kind, side_effects, dex_pc), inputs_(number_of_inputs, allocator->Adapter(kind)) {} DEFAULT_COPY_CONSTRUCTOR(VariableInputSizeInstruction); @@ -2585,8 +2602,8 @@ class HVariableInputSizeInstruction : public HInstruction { template<size_t N> class HTemplateInstruction: public HInstruction { public: - HTemplateInstruction<N>(SideEffects side_effects, uint32_t dex_pc) - : HInstruction(side_effects, dex_pc), inputs_() {} + HTemplateInstruction<N>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) + : HInstruction(kind, side_effects, dex_pc), inputs_() {} virtual ~HTemplateInstruction() {} using HInstruction::GetInputRecords; // Keep the const version visible. @@ -2607,8 +2624,8 @@ class HTemplateInstruction: public HInstruction { template<> class HTemplateInstruction<0>: public HInstruction { public: - explicit HTemplateInstruction<0>(SideEffects side_effects, uint32_t dex_pc) - : HInstruction(side_effects, dex_pc) {} + explicit HTemplateInstruction<0>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) + : HInstruction(kind, side_effects, dex_pc) {} virtual ~HTemplateInstruction() {} @@ -2627,8 +2644,12 @@ class HTemplateInstruction<0>: public HInstruction { template<intptr_t N> class HExpression : public HTemplateInstruction<N> { public: - HExpression<N>(DataType::Type type, SideEffects side_effects, uint32_t dex_pc) - : HTemplateInstruction<N>(side_effects, dex_pc) { + using HInstruction::InstructionKind; + HExpression<N>(InstructionKind kind, + DataType::Type type, + SideEffects side_effects, + uint32_t dex_pc) + : HTemplateInstruction<N>(kind, side_effects, dex_pc) { this->template SetPackedField<TypeField>(type); } virtual ~HExpression() {} @@ -2653,7 +2674,8 @@ class HExpression : public HTemplateInstruction<N> { class HReturnVoid FINAL : public HTemplateInstruction<0> { public: explicit HReturnVoid(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc) {} + : HTemplateInstruction(kReturnVoid, SideEffects::None(), dex_pc) { + } bool IsControlFlow() const OVERRIDE { return true; } @@ -2668,7 +2690,7 @@ class HReturnVoid FINAL : public HTemplateInstruction<0> { class HReturn FINAL : public HTemplateInstruction<1> { public: explicit HReturn(HInstruction* value, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc) { + : HTemplateInstruction(kReturn, SideEffects::None(), dex_pc) { SetRawInputAt(0, value); } @@ -2688,6 +2710,7 @@ class HPhi FINAL : public HVariableInputSizeInstruction { DataType::Type type, uint32_t dex_pc = kNoDexPc) : HVariableInputSizeInstruction( + kPhi, SideEffects::None(), dex_pc, allocator, @@ -2788,7 +2811,9 @@ class HPhi FINAL : public HVariableInputSizeInstruction { // exit block. class HExit FINAL : public HTemplateInstruction<0> { public: - explicit HExit(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {} + explicit HExit(uint32_t dex_pc = kNoDexPc) + : HTemplateInstruction(kExit, SideEffects::None(), dex_pc) { + } bool IsControlFlow() const OVERRIDE { return true; } @@ -2801,7 +2826,9 @@ class HExit FINAL : public HTemplateInstruction<0> { // Jumps from one block to another. class HGoto FINAL : public HTemplateInstruction<0> { public: - explicit HGoto(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {} + explicit HGoto(uint32_t dex_pc = kNoDexPc) + : HTemplateInstruction(kGoto, SideEffects::None(), dex_pc) { + } bool IsClonable() const OVERRIDE { return true; } bool IsControlFlow() const OVERRIDE { return true; } @@ -2818,8 +2845,9 @@ class HGoto FINAL : public HTemplateInstruction<0> { class HConstant : public HExpression<0> { public: - explicit HConstant(DataType::Type type, uint32_t dex_pc = kNoDexPc) - : HExpression(type, SideEffects::None(), dex_pc) {} + explicit HConstant(InstructionKind kind, DataType::Type type, uint32_t dex_pc = kNoDexPc) + : HExpression(kind, type, SideEffects::None(), dex_pc) { + } bool CanBeMoved() const OVERRIDE { return true; } @@ -2860,7 +2888,8 @@ class HNullConstant FINAL : public HConstant { private: explicit HNullConstant(uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kReference, dex_pc) {} + : HConstant(kNullConstant, DataType::Type::kReference, dex_pc) { + } friend class HGraph; }; @@ -2899,9 +2928,12 @@ class HIntConstant FINAL : public HConstant { private: explicit HIntConstant(int32_t value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kInt32, dex_pc), value_(value) {} + : HConstant(kIntConstant, DataType::Type::kInt32, dex_pc), value_(value) { + } explicit HIntConstant(bool value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kInt32, dex_pc), value_(value ? 1 : 0) {} + : HConstant(kIntConstant, DataType::Type::kInt32, dex_pc), + value_(value ? 1 : 0) { + } const int32_t value_; @@ -2935,7 +2967,9 @@ class HLongConstant FINAL : public HConstant { private: explicit HLongConstant(int64_t value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kInt64, dex_pc), value_(value) {} + : HConstant(kLongConstant, DataType::Type::kInt64, dex_pc), + value_(value) { + } const int64_t value_; @@ -2986,9 +3020,13 @@ class HFloatConstant FINAL : public HConstant { private: explicit HFloatConstant(float value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kFloat32, dex_pc), value_(value) {} + : HConstant(kFloatConstant, DataType::Type::kFloat32, dex_pc), + value_(value) { + } explicit HFloatConstant(int32_t value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kFloat32, dex_pc), value_(bit_cast<float, int32_t>(value)) {} + : HConstant(kFloatConstant, DataType::Type::kFloat32, dex_pc), + value_(bit_cast<float, int32_t>(value)) { + } const float value_; @@ -3039,9 +3077,13 @@ class HDoubleConstant FINAL : public HConstant { private: explicit HDoubleConstant(double value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kFloat64, dex_pc), value_(value) {} + : HConstant(kDoubleConstant, DataType::Type::kFloat64, dex_pc), + value_(value) { + } explicit HDoubleConstant(int64_t value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kFloat64, dex_pc), value_(bit_cast<double, int64_t>(value)) {} + : HConstant(kDoubleConstant, DataType::Type::kFloat64, dex_pc), + value_(bit_cast<double, int64_t>(value)) { + } const double value_; @@ -3055,7 +3097,7 @@ class HDoubleConstant FINAL : public HConstant { class HIf FINAL : public HTemplateInstruction<1> { public: explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc) { + : HTemplateInstruction(kIf, SideEffects::None(), dex_pc) { SetRawInputAt(0, input); } @@ -3091,7 +3133,7 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> { }; explicit HTryBoundary(BoundaryKind kind, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc) { + : HTemplateInstruction(kTryBoundary, SideEffects::None(), dex_pc) { SetPackedField<BoundaryKindField>(kind); } @@ -3150,6 +3192,7 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { DeoptimizationKind kind, uint32_t dex_pc) : HVariableInputSizeInstruction( + kDeoptimize, SideEffects::All(), dex_pc, allocator, @@ -3173,6 +3216,7 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { DeoptimizationKind kind, uint32_t dex_pc) : HVariableInputSizeInstruction( + kDeoptimize, SideEffects::CanTriggerGC(), dex_pc, allocator, @@ -3241,7 +3285,12 @@ class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { // CHA guards are only optimized in a separate pass and it has no side effects // with regard to other passes. HShouldDeoptimizeFlag(ArenaAllocator* allocator, uint32_t dex_pc) - : HVariableInputSizeInstruction(SideEffects::None(), dex_pc, allocator, 0, kArenaAllocCHA) { + : HVariableInputSizeInstruction(kShouldDeoptimizeFlag, + SideEffects::None(), + dex_pc, + allocator, + 0, + kArenaAllocCHA) { } DataType::Type GetType() const OVERRIDE { return DataType::Type::kInt32; } @@ -3264,7 +3313,8 @@ class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { class HCurrentMethod FINAL : public HExpression<0> { public: explicit HCurrentMethod(DataType::Type type, uint32_t dex_pc = kNoDexPc) - : HExpression(type, SideEffects::None(), dex_pc) {} + : HExpression(kCurrentMethod, type, SideEffects::None(), dex_pc) { + } DECLARE_INSTRUCTION(CurrentMethod); @@ -3286,7 +3336,7 @@ class HClassTableGet FINAL : public HExpression<1> { TableKind kind, size_t index, uint32_t dex_pc) - : HExpression(type, SideEffects::None(), dex_pc), + : HExpression(kClassTableGet, type, SideEffects::None(), dex_pc), index_(index) { SetPackedField<TableKindField>(kind); SetRawInputAt(0, cls); @@ -3329,7 +3379,7 @@ class HPackedSwitch FINAL : public HTemplateInstruction<1> { uint32_t num_entries, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc), + : HTemplateInstruction(kPackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); @@ -3359,8 +3409,11 @@ class HPackedSwitch FINAL : public HTemplateInstruction<1> { class HUnaryOperation : public HExpression<1> { public: - HUnaryOperation(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HExpression(result_type, SideEffects::None(), dex_pc) { + HUnaryOperation(InstructionKind kind, + DataType::Type result_type, + HInstruction* input, + uint32_t dex_pc = kNoDexPc) + : HExpression(kind, result_type, SideEffects::None(), dex_pc) { SetRawInputAt(0, input); } @@ -3394,12 +3447,13 @@ class HUnaryOperation : public HExpression<1> { class HBinaryOperation : public HExpression<2> { public: - HBinaryOperation(DataType::Type result_type, + HBinaryOperation(InstructionKind kind, + DataType::Type result_type, HInstruction* left, HInstruction* right, SideEffects side_effects = SideEffects::None(), uint32_t dex_pc = kNoDexPc) - : HExpression(result_type, side_effects, dex_pc) { + : HExpression(kind, result_type, side_effects, dex_pc) { SetRawInputAt(0, left); SetRawInputAt(1, right); } @@ -3498,8 +3552,16 @@ std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs); class HCondition : public HBinaryOperation { public: - HCondition(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(DataType::Type::kBool, first, second, SideEffects::None(), dex_pc) { + HCondition(InstructionKind kind, + HInstruction* first, + HInstruction* second, + uint32_t dex_pc = kNoDexPc) + : HBinaryOperation(kind, + DataType::Type::kBool, + first, + second, + SideEffects::None(), + dex_pc) { SetPackedField<ComparisonBiasField>(ComparisonBias::kNoBias); } @@ -3579,7 +3641,8 @@ class HCondition : public HBinaryOperation { class HEqual FINAL : public HCondition { public: HEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kEqual, first, second, dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -3623,8 +3686,10 @@ class HEqual FINAL : public HCondition { class HNotEqual FINAL : public HCondition { public: - HNotEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + HNotEqual(HInstruction* first, HInstruction* second, + uint32_t dex_pc = kNoDexPc) + : HCondition(kNotEqual, first, second, dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -3667,8 +3732,10 @@ class HNotEqual FINAL : public HCondition { class HLessThan FINAL : public HCondition { public: - HLessThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + HLessThan(HInstruction* first, HInstruction* second, + uint32_t dex_pc = kNoDexPc) + : HCondition(kLessThan, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3705,8 +3772,10 @@ class HLessThan FINAL : public HCondition { class HLessThanOrEqual FINAL : public HCondition { public: - HLessThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + HLessThanOrEqual(HInstruction* first, HInstruction* second, + uint32_t dex_pc = kNoDexPc) + : HCondition(kLessThanOrEqual, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3744,7 +3813,8 @@ class HLessThanOrEqual FINAL : public HCondition { class HGreaterThan FINAL : public HCondition { public: HGreaterThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kGreaterThan, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3782,7 +3852,8 @@ class HGreaterThan FINAL : public HCondition { class HGreaterThanOrEqual FINAL : public HCondition { public: HGreaterThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kGreaterThanOrEqual, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3820,7 +3891,8 @@ class HGreaterThanOrEqual FINAL : public HCondition { class HBelow FINAL : public HCondition { public: HBelow(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kBelow, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3861,7 +3933,8 @@ class HBelow FINAL : public HCondition { class HBelowOrEqual FINAL : public HCondition { public: HBelowOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kBelowOrEqual, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3902,7 +3975,8 @@ class HBelowOrEqual FINAL : public HCondition { class HAbove FINAL : public HCondition { public: HAbove(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kAbove, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3943,7 +4017,8 @@ class HAbove FINAL : public HCondition { class HAboveOrEqual FINAL : public HCondition { public: HAboveOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kAboveOrEqual, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3993,7 +4068,8 @@ class HCompare FINAL : public HBinaryOperation { HInstruction* second, ComparisonBias bias, uint32_t dex_pc) - : HBinaryOperation(DataType::Type::kInt32, + : HBinaryOperation(kCompare, + DataType::Type::kInt32, first, second, SideEffectsForArchRuntimeCalls(comparison_type), @@ -4079,7 +4155,10 @@ class HNewInstance FINAL : public HExpression<1> { const DexFile& dex_file, bool finalizable, QuickEntrypointEnum entrypoint) - : HExpression(DataType::Type::kReference, SideEffects::CanTriggerGC(), dex_pc), + : HExpression(kNewInstance, + DataType::Type::kReference, + SideEffects::CanTriggerGC(), + dex_pc), type_index_(type_index), dex_file_(dex_file), entrypoint_(entrypoint) { @@ -4235,7 +4314,8 @@ class HInvoke : public HVariableInputSizeInstruction { using InvokeTypeField = BitField<InvokeType, kFieldInvokeType, kFieldInvokeTypeSize>; using ReturnTypeField = BitField<DataType::Type, kFieldReturnType, kFieldReturnTypeSize>; - HInvoke(ArenaAllocator* allocator, + HInvoke(InstructionKind kind, + ArenaAllocator* allocator, uint32_t number_of_arguments, uint32_t number_of_other_inputs, DataType::Type return_type, @@ -4244,6 +4324,7 @@ class HInvoke : public HVariableInputSizeInstruction { ArtMethod* resolved_method, InvokeType invoke_type) : HVariableInputSizeInstruction( + kind, SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. dex_pc, allocator, @@ -4278,7 +4359,8 @@ class HInvokeUnresolved FINAL : public HInvoke { uint32_t dex_pc, uint32_t dex_method_index, InvokeType invoke_type) - : HInvoke(allocator, + : HInvoke(kInvokeUnresolved, + allocator, number_of_arguments, 0u /* number_of_other_inputs */, return_type, @@ -4303,14 +4385,16 @@ class HInvokePolymorphic FINAL : public HInvoke { DataType::Type return_type, uint32_t dex_pc, uint32_t dex_method_index) - : HInvoke(allocator, + : HInvoke(kInvokePolymorphic, + allocator, number_of_arguments, 0u /* number_of_other_inputs */, return_type, dex_pc, dex_method_index, nullptr, - kVirtual) {} + kVirtual) { + } bool IsClonable() const OVERRIDE { return true; } @@ -4347,6 +4431,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Used for app->boot calls with non-relocatable image and for JIT-compiled calls. kDirectAddress, + // Load from an entry in the .data.bimg.rel.ro using a PC-relative load. + // Used for app->boot calls with relocatable image. + kBootImageRelRo, + // Load from an entry in the .bss section using a PC-relative load. // Used for classes outside boot image when .bss is accessible with a PC-relative load. kBssEntry, @@ -4387,7 +4475,8 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { InvokeType invoke_type, MethodReference target_method, ClinitCheckRequirement clinit_check_requirement) - : HInvoke(allocator, + : HInvoke(kInvokeStaticOrDirect, + allocator, number_of_arguments, // There is potentially one extra argument for the HCurrentMethod node, and // potentially one other if the clinit check is explicit, and potentially @@ -4478,6 +4567,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; } bool HasPcRelativeMethodLoadKind() const { return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative || + GetMethodLoadKind() == MethodLoadKind::kBootImageRelRo || GetMethodLoadKind() == MethodLoadKind::kBssEntry; } bool HasCurrentMethodInput() const { @@ -4567,7 +4657,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { kFieldClinitCheckRequirementSize>; // Cached values of the resolved method, to avoid needing the mutator lock. - MethodReference target_method_; + const MethodReference target_method_; DispatchInfo dispatch_info_; }; std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs); @@ -4582,7 +4672,8 @@ class HInvokeVirtual FINAL : public HInvoke { uint32_t dex_method_index, ArtMethod* resolved_method, uint32_t vtable_index) - : HInvoke(allocator, + : HInvoke(kInvokeVirtual, + allocator, number_of_arguments, 0u, return_type, @@ -4590,7 +4681,8 @@ class HInvokeVirtual FINAL : public HInvoke { dex_method_index, resolved_method, kVirtual), - vtable_index_(vtable_index) {} + vtable_index_(vtable_index) { + } bool IsClonable() const OVERRIDE { return true; } @@ -4633,7 +4725,8 @@ class HInvokeInterface FINAL : public HInvoke { uint32_t dex_method_index, ArtMethod* resolved_method, uint32_t imt_index) - : HInvoke(allocator, + : HInvoke(kInvokeInterface, + allocator, number_of_arguments, 0u, return_type, @@ -4641,7 +4734,8 @@ class HInvokeInterface FINAL : public HInvoke { dex_method_index, resolved_method, kInterface), - imt_index_(imt_index) {} + imt_index_(imt_index) { + } bool IsClonable() const OVERRIDE { return true; } @@ -4670,7 +4764,7 @@ class HInvokeInterface FINAL : public HInvoke { class HNeg FINAL : public HUnaryOperation { public: HNeg(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HUnaryOperation(result_type, input, dex_pc) { + : HUnaryOperation(kNeg, result_type, input, dex_pc) { DCHECK_EQ(result_type, DataType::Kind(input->GetType())); } @@ -4698,7 +4792,7 @@ class HNeg FINAL : public HUnaryOperation { class HNewArray FINAL : public HExpression<2> { public: HNewArray(HInstruction* cls, HInstruction* length, uint32_t dex_pc) - : HExpression(DataType::Type::kReference, SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kNewArray, DataType::Type::kReference, SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, cls); SetRawInputAt(1, length); } @@ -4734,7 +4828,8 @@ class HAdd FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kAdd, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -4769,7 +4864,8 @@ class HSub FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kSub, result_type, left, right, SideEffects::None(), dex_pc) { + } template <typename T> static T Compute(T x, T y) { return x - y; } @@ -4802,7 +4898,8 @@ class HMul FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kMul, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -4837,7 +4934,8 @@ class HDiv FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kDiv, result_type, left, right, SideEffects::None(), dex_pc) { + } template <typename T> T ComputeIntegral(T x, T y) const { @@ -4884,7 +4982,8 @@ class HRem FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kRem, result_type, left, right, SideEffects::None(), dex_pc) { + } template <typename T> T ComputeIntegral(T x, T y) const { @@ -4925,12 +5024,123 @@ class HRem FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Rem); }; +class HMin FINAL : public HBinaryOperation { + public: + HMin(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc) + : HBinaryOperation(kMin, result_type, left, right, SideEffects::None(), dex_pc) {} + + bool IsCommutative() const OVERRIDE { return true; } + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x, T y) { + return (x <= y) ? x : y; + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + // TODO: Evaluation for floating-point values. + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + + DECLARE_INSTRUCTION(Min); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Min); +}; + +class HMax FINAL : public HBinaryOperation { + public: + HMax(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc) + : HBinaryOperation(kMax, result_type, left, right, SideEffects::None(), dex_pc) {} + + bool IsCommutative() const OVERRIDE { return true; } + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x, T y) { + return (x >= y) ? x : y; + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + // TODO: Evaluation for floating-point values. + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + + DECLARE_INSTRUCTION(Max); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Max); +}; + +class HAbs FINAL : public HUnaryOperation { + public: + HAbs(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) + : HUnaryOperation(kAbs, result_type, input, dex_pc) {} + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x) { + return x < 0 ? -x : x; + } + + // Evaluation for floating-point values. + // Note, as a "quality of implementation", rather than pure "spec compliance", + // we require that Math.abs() clears the sign bit (but changes nothing else) + // for all floating-point numbers, including NaN (signaling NaN may become quiet though). + // http://b/30758343 + template <typename T, typename S> static T ComputeFP(T x) { + S bits = bit_cast<S, T>(x); + return bit_cast<T, S>(bits & std::numeric_limits<S>::max()); + } + + HConstant* Evaluate(HIntConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(ComputeIntegral(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(ComputeIntegral(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HFloatConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetFloatConstant( + ComputeFP<float, int32_t>(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HDoubleConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetDoubleConstant( + ComputeFP<double, int64_t>(x->GetValue()), GetDexPc()); + } + + DECLARE_INSTRUCTION(Abs); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Abs); +}; + class HDivZeroCheck FINAL : public HExpression<1> { public: // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException` // constructor. HDivZeroCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kDivZeroCheck, value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, value); } @@ -4957,7 +5167,7 @@ class HShl FINAL : public HBinaryOperation { HInstruction* value, HInstruction* distance, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, value, distance, SideEffects::None(), dex_pc) { + : HBinaryOperation(kShl, result_type, value, distance, SideEffects::None(), dex_pc) { DCHECK_EQ(result_type, DataType::Kind(value->GetType())); DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(distance->GetType())); } @@ -5003,7 +5213,7 @@ class HShr FINAL : public HBinaryOperation { HInstruction* value, HInstruction* distance, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, value, distance, SideEffects::None(), dex_pc) { + : HBinaryOperation(kShr, result_type, value, distance, SideEffects::None(), dex_pc) { DCHECK_EQ(result_type, DataType::Kind(value->GetType())); DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(distance->GetType())); } @@ -5049,7 +5259,7 @@ class HUShr FINAL : public HBinaryOperation { HInstruction* value, HInstruction* distance, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, value, distance, SideEffects::None(), dex_pc) { + : HBinaryOperation(kUShr, result_type, value, distance, SideEffects::None(), dex_pc) { DCHECK_EQ(result_type, DataType::Kind(value->GetType())); DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(distance->GetType())); } @@ -5097,7 +5307,8 @@ class HAnd FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kAnd, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -5134,7 +5345,8 @@ class HOr FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kOr, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -5171,7 +5383,8 @@ class HXor FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kXor, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -5205,7 +5418,7 @@ class HXor FINAL : public HBinaryOperation { class HRor FINAL : public HBinaryOperation { public: HRor(DataType::Type result_type, HInstruction* value, HInstruction* distance) - : HBinaryOperation(result_type, value, distance) { + : HBinaryOperation(kRor, result_type, value, distance) { DCHECK_EQ(result_type, DataType::Kind(value->GetType())); DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(distance->GetType())); } @@ -5262,7 +5475,7 @@ class HParameterValue FINAL : public HExpression<0> { uint8_t index, DataType::Type parameter_type, bool is_this = false) - : HExpression(parameter_type, SideEffects::None(), kNoDexPc), + : HExpression(kParameterValue, parameter_type, SideEffects::None(), kNoDexPc), dex_file_(dex_file), type_index_(type_index), index_(index) { @@ -5301,7 +5514,8 @@ class HParameterValue FINAL : public HExpression<0> { class HNot FINAL : public HUnaryOperation { public: HNot(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HUnaryOperation(result_type, input, dex_pc) {} + : HUnaryOperation(kNot, result_type, input, dex_pc) { + } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { @@ -5334,7 +5548,8 @@ class HNot FINAL : public HUnaryOperation { class HBooleanNot FINAL : public HUnaryOperation { public: explicit HBooleanNot(HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HUnaryOperation(DataType::Type::kBool, input, dex_pc) {} + : HUnaryOperation(kBooleanNot, DataType::Type::kBool, input, dex_pc) { + } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { @@ -5372,7 +5587,7 @@ class HTypeConversion FINAL : public HExpression<1> { public: // Instantiate a type conversion of `input` to `result_type`. HTypeConversion(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HExpression(result_type, SideEffects::None(), dex_pc) { + : HExpression(kTypeConversion, result_type, SideEffects::None(), dex_pc) { SetRawInputAt(0, input); // Invariant: We should never generate a conversion to a Boolean value. DCHECK_NE(DataType::Type::kBool, result_type); @@ -5404,7 +5619,7 @@ class HNullCheck FINAL : public HExpression<1> { // `HNullCheck` can trigger GC, as it may call the `NullPointerException` // constructor. HNullCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kNullCheck, value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, value); } @@ -5474,7 +5689,10 @@ class HInstanceFieldGet FINAL : public HExpression<1> { uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HExpression(field_type, SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc), + : HExpression(kInstanceFieldGet, + field_type, + SideEffects::FieldReadOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -5534,7 +5752,9 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc), + : HTemplateInstruction(kInstanceFieldSet, + SideEffects::FieldWriteOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -5586,7 +5806,8 @@ class HArrayGet FINAL : public HExpression<2> { type, SideEffects::ArrayReadOfType(type), dex_pc, - /* is_string_char_at */ false) {} + /* is_string_char_at */ false) { + } HArrayGet(HInstruction* array, HInstruction* index, @@ -5594,7 +5815,7 @@ class HArrayGet FINAL : public HExpression<2> { SideEffects side_effects, uint32_t dex_pc, bool is_string_char_at) - : HExpression(type, side_effects, dex_pc) { + : HExpression(kArrayGet, type, side_effects, dex_pc) { SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at); SetRawInputAt(0, array); SetRawInputAt(1, index); @@ -5678,7 +5899,8 @@ class HArraySet FINAL : public HTemplateInstruction<3> { expected_component_type, // Make a best guess for side effects now, may be refined during SSA building. ComputeSideEffects(GetComponentType(value->GetType(), expected_component_type)), - dex_pc) {} + dex_pc) { + } HArraySet(HInstruction* array, HInstruction* index, @@ -5686,7 +5908,7 @@ class HArraySet FINAL : public HTemplateInstruction<3> { DataType::Type expected_component_type, SideEffects side_effects, uint32_t dex_pc) - : HTemplateInstruction(side_effects, dex_pc) { + : HTemplateInstruction(kArraySet, side_effects, dex_pc) { SetPackedField<ExpectedComponentTypeField>(expected_component_type); SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == DataType::Type::kReference); SetPackedFlag<kFlagValueCanBeNull>(true); @@ -5786,7 +6008,7 @@ class HArraySet FINAL : public HTemplateInstruction<3> { class HArrayLength FINAL : public HExpression<1> { public: HArrayLength(HInstruction* array, uint32_t dex_pc, bool is_string_length = false) - : HExpression(DataType::Type::kInt32, SideEffects::None(), dex_pc) { + : HExpression(kArrayLength, DataType::Type::kInt32, SideEffects::None(), dex_pc) { SetPackedFlag<kFlagIsStringLength>(is_string_length); // Note that arrays do not change length, so the instruction does not // depend on any write. @@ -5829,7 +6051,7 @@ class HBoundsCheck FINAL : public HExpression<2> { HInstruction* length, uint32_t dex_pc, bool is_string_char_at = false) - : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kBoundsCheck, index->GetType(), SideEffects::CanTriggerGC(), dex_pc) { DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(index->GetType())); SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at); SetRawInputAt(0, index); @@ -5862,7 +6084,9 @@ class HBoundsCheck FINAL : public HExpression<2> { class HSuspendCheck FINAL : public HTemplateInstruction<0> { public: explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) {} + : HTemplateInstruction(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc), + slow_path_(nullptr) { + } bool IsClonable() const OVERRIDE { return true; } @@ -5889,7 +6113,8 @@ class HSuspendCheck FINAL : public HTemplateInstruction<0> { class HNativeDebugInfo : public HTemplateInstruction<0> { public: explicit HNativeDebugInfo(uint32_t dex_pc) - : HTemplateInstruction<0>(SideEffects::None(), dex_pc) {} + : HTemplateInstruction<0>(kNativeDebugInfo, SideEffects::None(), dex_pc) { + } bool NeedsEnvironment() const OVERRIDE { return true; @@ -5947,11 +6172,12 @@ class HLoadClass FINAL : public HInstruction { bool is_referrers_class, uint32_t dex_pc, bool needs_access_check) - : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc), + : HInstruction(kLoadClass, SideEffectsForArchRuntimeCalls(), dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), type_index_(type_index), dex_file_(dex_file), - klass_(klass) { + klass_(klass), + loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { // Referrers class should not need access check. We never inline unverified // methods so we can't possibly end up in this situation. DCHECK(!is_referrers_class || !needs_access_check); @@ -5961,7 +6187,6 @@ class HLoadClass FINAL : public HInstruction { SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); SetPackedFlag<kFlagIsInBootImage>(false); SetPackedFlag<kFlagGenerateClInitCheck>(false); - SetPackedFlag<kFlagValidLoadedClassRTI>(false); } bool IsClonable() const OVERRIDE { return true; } @@ -6010,18 +6235,13 @@ class HLoadClass FINAL : public HInstruction { } ReferenceTypeInfo GetLoadedClassRTI() { - if (GetPackedFlag<kFlagValidLoadedClassRTI>()) { - // Note: The is_exact flag from the return value should not be used. - return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true); - } else { - return ReferenceTypeInfo::CreateInvalid(); - } + return loaded_class_rti_; } - // Loaded class RTI is marked as valid by RTP if the klass_ is admissible. - void SetValidLoadedClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK(klass_ != nullptr); - SetPackedFlag<kFlagValidLoadedClassRTI>(true); + void SetLoadedClassRTI(ReferenceTypeInfo rti) { + // Make sure we only set exact types (the loaded class should never be merged). + DCHECK(rti.IsExact()); + loaded_class_rti_ = rti; } dex::TypeIndex GetTypeIndex() const { return type_index_; } @@ -6074,8 +6294,7 @@ class HLoadClass FINAL : public HInstruction { static constexpr size_t kFieldLoadKind = kFlagGenerateClInitCheck + 1; static constexpr size_t kFieldLoadKindSize = MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast)); - static constexpr size_t kFlagValidLoadedClassRTI = kFieldLoadKind + kFieldLoadKindSize; - static constexpr size_t kNumberOfLoadClassPackedBits = kFlagValidLoadedClassRTI + 1; + static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize; static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields."); using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>; @@ -6103,6 +6322,8 @@ class HLoadClass FINAL : public HInstruction { const DexFile& dex_file_; Handle<mirror::Class> klass_; + + ReferenceTypeInfo loaded_class_rti_; }; std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs); @@ -6167,7 +6388,7 @@ class HLoadString FINAL : public HInstruction { dex::StringIndex string_index, const DexFile& dex_file, uint32_t dex_pc) - : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc), + : HInstruction(kLoadString, SideEffectsForArchRuntimeCalls(), dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), string_index_(string_index), dex_file_(dex_file) { @@ -6304,6 +6525,7 @@ class HClinitCheck FINAL : public HExpression<1> { public: HClinitCheck(HLoadClass* constant, uint32_t dex_pc) : HExpression( + kClinitCheck, DataType::Type::kReference, SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. dex_pc) { @@ -6346,7 +6568,10 @@ class HStaticFieldGet FINAL : public HExpression<1> { uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HExpression(field_type, SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc), + : HExpression(kStaticFieldGet, + field_type, + SideEffects::FieldReadOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -6403,7 +6628,9 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc), + : HTemplateInstruction(kStaticFieldSet, + SideEffects::FieldWriteOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -6446,7 +6673,10 @@ class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> { DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HExpression(field_type, SideEffects::AllExceptGCDependency(), dex_pc), + : HExpression(kUnresolvedInstanceFieldGet, + field_type, + SideEffects::AllExceptGCDependency(), + dex_pc), field_index_(field_index) { SetRawInputAt(0, obj); } @@ -6474,7 +6704,9 @@ class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> { DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc), + : HTemplateInstruction(kUnresolvedInstanceFieldSet, + SideEffects::AllExceptGCDependency(), + dex_pc), field_index_(field_index) { SetPackedField<FieldTypeField>(field_type); DCHECK_EQ(DataType::Kind(field_type), DataType::Kind(value->GetType())); @@ -6512,7 +6744,10 @@ class HUnresolvedStaticFieldGet FINAL : public HExpression<0> { HUnresolvedStaticFieldGet(DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HExpression(field_type, SideEffects::AllExceptGCDependency(), dex_pc), + : HExpression(kUnresolvedStaticFieldGet, + field_type, + SideEffects::AllExceptGCDependency(), + dex_pc), field_index_(field_index) { } @@ -6538,7 +6773,9 @@ class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> { DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc), + : HTemplateInstruction(kUnresolvedStaticFieldSet, + SideEffects::AllExceptGCDependency(), + dex_pc), field_index_(field_index) { SetPackedField<FieldTypeField>(field_type); DCHECK_EQ(DataType::Kind(field_type), DataType::Kind(value->GetType())); @@ -6574,7 +6811,8 @@ class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> { class HLoadException FINAL : public HExpression<0> { public: explicit HLoadException(uint32_t dex_pc = kNoDexPc) - : HExpression(DataType::Type::kReference, SideEffects::None(), dex_pc) {} + : HExpression(kLoadException, DataType::Type::kReference, SideEffects::None(), dex_pc) { + } bool CanBeNull() const OVERRIDE { return false; } @@ -6589,7 +6827,8 @@ class HLoadException FINAL : public HExpression<0> { class HClearException FINAL : public HTemplateInstruction<0> { public: explicit HClearException(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::AllWrites(), dex_pc) {} + : HTemplateInstruction(kClearException, SideEffects::AllWrites(), dex_pc) { + } DECLARE_INSTRUCTION(ClearException); @@ -6600,7 +6839,7 @@ class HClearException FINAL : public HTemplateInstruction<0> { class HThrow FINAL : public HTemplateInstruction<1> { public: HThrow(HInstruction* exception, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) { + : HTemplateInstruction(kThrow, SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, exception); } @@ -6630,143 +6869,50 @@ enum class TypeCheckKind { kInterfaceCheck, // No optimization yet when checking against an interface. kArrayObjectCheck, // Can just check if the array is not primitive. kArrayCheck, // No optimization yet when checking against a generic array. - kBitstringCheck, // Compare the type check bitstring. kLast = kArrayCheck }; std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs); -// Note: HTypeCheckInstruction is just a helper class, not an abstract instruction with an -// `IsTypeCheckInstruction()`. (New virtual methods in the HInstruction class have a high cost.) -class HTypeCheckInstruction : public HVariableInputSizeInstruction { +class HInstanceOf FINAL : public HExpression<2> { public: - HTypeCheckInstruction(HInstruction* object, - HInstruction* target_class_or_null, - TypeCheckKind check_kind, - Handle<mirror::Class> klass, - uint32_t dex_pc, - ArenaAllocator* allocator, - HIntConstant* bitstring_path_to_root, - HIntConstant* bitstring_mask, - SideEffects side_effects) - : HVariableInputSizeInstruction( - side_effects, - dex_pc, - allocator, - /* number_of_inputs */ check_kind == TypeCheckKind::kBitstringCheck ? 4u : 2u, - kArenaAllocTypeCheckInputs), - klass_(klass) { + HInstanceOf(HInstruction* object, + HLoadClass* target_class, + TypeCheckKind check_kind, + uint32_t dex_pc) + : HExpression(kInstanceOf, + DataType::Type::kBool, + SideEffectsForArchRuntimeCalls(check_kind), + dex_pc) { SetPackedField<TypeCheckKindField>(check_kind); SetPackedFlag<kFlagMustDoNullCheck>(true); - SetPackedFlag<kFlagValidTargetClassRTI>(false); SetRawInputAt(0, object); - SetRawInputAt(1, target_class_or_null); - DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_path_to_root != nullptr); - DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_mask != nullptr); - if (check_kind == TypeCheckKind::kBitstringCheck) { - DCHECK(target_class_or_null->IsNullConstant()); - SetRawInputAt(2, bitstring_path_to_root); - SetRawInputAt(3, bitstring_mask); - } else { - DCHECK(target_class_or_null->IsLoadClass()); - } + SetRawInputAt(1, target_class); } HLoadClass* GetTargetClass() const { - DCHECK_NE(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); HInstruction* load_class = InputAt(1); DCHECK(load_class->IsLoadClass()); return load_class->AsLoadClass(); } - uint32_t GetBitstringPathToRoot() const { - DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); - HInstruction* path_to_root = InputAt(2); - DCHECK(path_to_root->IsIntConstant()); - return static_cast<uint32_t>(path_to_root->AsIntConstant()->GetValue()); - } - - uint32_t GetBitstringMask() const { - DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); - HInstruction* mask = InputAt(3); - DCHECK(mask->IsIntConstant()); - return static_cast<uint32_t>(mask->AsIntConstant()->GetValue()); - } - bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { - DCHECK(other->IsInstanceOf() || other->IsCheckCast()) << other->DebugName(); - return GetPackedFields() == down_cast<const HTypeCheckInstruction*>(other)->GetPackedFields(); + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + return true; } + bool NeedsEnvironment() const OVERRIDE { + return CanCallRuntime(GetTypeCheckKind()); + } + + // Used only in code generation. bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); } void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); } TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); } bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } - ReferenceTypeInfo GetTargetClassRTI() { - if (GetPackedFlag<kFlagValidTargetClassRTI>()) { - // Note: The is_exact flag from the return value should not be used. - return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true); - } else { - return ReferenceTypeInfo::CreateInvalid(); - } - } - - // Target class RTI is marked as valid by RTP if the klass_ is admissible. - void SetValidTargetClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK(klass_ != nullptr); - SetPackedFlag<kFlagValidTargetClassRTI>(true); - } - - Handle<mirror::Class> GetClass() const { - return klass_; - } - - protected: - DEFAULT_COPY_CONSTRUCTOR(TypeCheckInstruction); - - private: - static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; - static constexpr size_t kFieldTypeCheckKindSize = - MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); - static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; - static constexpr size_t kFlagValidTargetClassRTI = kFlagMustDoNullCheck + 1; - static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagValidTargetClassRTI + 1; - static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; - - Handle<mirror::Class> klass_; -}; - -class HInstanceOf FINAL : public HTypeCheckInstruction { - public: - HInstanceOf(HInstruction* object, - HInstruction* target_class_or_null, - TypeCheckKind check_kind, - Handle<mirror::Class> klass, - uint32_t dex_pc, - ArenaAllocator* allocator, - HIntConstant* bitstring_path_to_root, - HIntConstant* bitstring_mask) - : HTypeCheckInstruction(object, - target_class_or_null, - check_kind, - klass, - dex_pc, - allocator, - bitstring_path_to_root, - bitstring_mask, - SideEffectsForArchRuntimeCalls(check_kind)) {} - - DataType::Type GetType() const OVERRIDE { return DataType::Type::kBool; } - - bool NeedsEnvironment() const OVERRIDE { - return CanCallRuntime(GetTypeCheckKind()); - } - static bool CanCallRuntime(TypeCheckKind check_kind) { // Mips currently does runtime calls for any other checks. return check_kind != TypeCheckKind::kExactCheck; @@ -6780,12 +6926,21 @@ class HInstanceOf FINAL : public HTypeCheckInstruction { protected: DEFAULT_COPY_CONSTRUCTOR(InstanceOf); + + private: + static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits; + static constexpr size_t kFieldTypeCheckKindSize = + MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); + static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; + static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1; + static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); + using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; }; class HBoundType FINAL : public HExpression<1> { public: explicit HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HExpression(DataType::Type::kReference, SideEffects::None(), dex_pc), + : HExpression(kBoundType, DataType::Type::kReference, SideEffects::None(), dex_pc), upper_bound_(ReferenceTypeInfo::CreateInvalid()) { SetPackedFlag<kFlagUpperCanBeNull>(true); SetPackedFlag<kFlagCanBeNull>(true); @@ -6829,25 +6984,31 @@ class HBoundType FINAL : public HExpression<1> { ReferenceTypeInfo upper_bound_; }; -class HCheckCast FINAL : public HTypeCheckInstruction { +class HCheckCast FINAL : public HTemplateInstruction<2> { public: HCheckCast(HInstruction* object, - HInstruction* target_class_or_null, + HLoadClass* target_class, TypeCheckKind check_kind, - Handle<mirror::Class> klass, - uint32_t dex_pc, - ArenaAllocator* allocator, - HIntConstant* bitstring_path_to_root, - HIntConstant* bitstring_mask) - : HTypeCheckInstruction(object, - target_class_or_null, - check_kind, - klass, - dex_pc, - allocator, - bitstring_path_to_root, - bitstring_mask, - SideEffects::CanTriggerGC()) {} + uint32_t dex_pc) + : HTemplateInstruction(kCheckCast, SideEffects::CanTriggerGC(), dex_pc) { + SetPackedField<TypeCheckKindField>(check_kind); + SetPackedFlag<kFlagMustDoNullCheck>(true); + SetRawInputAt(0, object); + SetRawInputAt(1, target_class); + } + + HLoadClass* GetTargetClass() const { + HInstruction* load_class = InputAt(1); + DCHECK(load_class->IsLoadClass()); + return load_class->AsLoadClass(); + } + + bool IsClonable() const OVERRIDE { return true; } + bool CanBeMoved() const OVERRIDE { return true; } + + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + return true; + } bool NeedsEnvironment() const OVERRIDE { // Instruction may throw a CheckCastError. @@ -6856,10 +7017,24 @@ class HCheckCast FINAL : public HTypeCheckInstruction { bool CanThrow() const OVERRIDE { return true; } + bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); } + void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); } + TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); } + bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } + DECLARE_INSTRUCTION(CheckCast); protected: DEFAULT_COPY_CONSTRUCTOR(CheckCast); + + private: + static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; + static constexpr size_t kFieldTypeCheckKindSize = + MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); + static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; + static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1; + static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); + using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; }; /** @@ -6892,7 +7067,9 @@ class HMemoryBarrier FINAL : public HTemplateInstruction<0> { public: explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc) : HTemplateInstruction( - SideEffects::AllWritesAndReads(), dex_pc) { // Assume write/read on all fields/arrays. + kMemoryBarrier, + SideEffects::AllWritesAndReads(), // Assume write/read on all fields/arrays. + dex_pc) { SetPackedField<BarrierKindField>(barrier_kind); } @@ -7011,7 +7188,8 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction { // // If in a later phase we discover that there are no writes to reference final fields, // we can refine the side effect to a smaller set of type reads (see above constraints). - : HVariableInputSizeInstruction(SideEffects::AllReads(), + : HVariableInputSizeInstruction(kConstructorFence, + SideEffects::AllReads(), dex_pc, allocator, /* number_of_inputs */ 1, @@ -7078,6 +7256,7 @@ class HMonitorOperation FINAL : public HTemplateInstruction<1> { HMonitorOperation(HInstruction* object, OperationKind kind, uint32_t dex_pc) : HTemplateInstruction( + kMonitorOperation, SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. dex_pc) { SetPackedField<OperationKindField>(kind); @@ -7119,7 +7298,7 @@ class HSelect FINAL : public HExpression<3> { HInstruction* true_value, HInstruction* false_value, uint32_t dex_pc) - : HExpression(HPhi::ToPhiType(true_value->GetType()), SideEffects::None(), dex_pc) { + : HExpression(kSelect, HPhi::ToPhiType(true_value->GetType()), SideEffects::None(), dex_pc) { DCHECK_EQ(HPhi::ToPhiType(true_value->GetType()), HPhi::ToPhiType(false_value->GetType())); // First input must be `true_value` or `false_value` to allow codegens to @@ -7232,7 +7411,7 @@ static constexpr size_t kDefaultNumberOfMoves = 4; class HParallelMove FINAL : public HTemplateInstruction<0> { public: explicit HParallelMove(ArenaAllocator* allocator, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc), + : HTemplateInstruction(kParallelMove, SideEffects::None(), dex_pc), moves_(allocator->Adapter(kArenaAllocMoveOperands)) { moves_.reserve(kDefaultNumberOfMoves); } @@ -7294,7 +7473,10 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { class HIntermediateAddress FINAL : public HExpression<2> { public: HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc) - : HExpression(DataType::Type::kInt32, SideEffects::DependsOnGC(), dex_pc) { + : HExpression(kIntermediateAddress, + DataType::Type::kInt32, + SideEffects::DependsOnGC(), + dex_pc) { DCHECK_EQ(DataType::Size(DataType::Type::kInt32), DataType::Size(DataType::Type::kReference)) << "kPrimInt and kPrimNot have different sizes."; diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h index 2c0595e3d8..d0e0fef946 100644 --- a/compiler/optimizing/nodes_mips.h +++ b/compiler/optimizing/nodes_mips.h @@ -24,7 +24,11 @@ class HMipsComputeBaseMethodAddress : public HExpression<0> { public: // Treat the value as an int32_t, but it is really a 32 bit native pointer. HMipsComputeBaseMethodAddress() - : HExpression(DataType::Type::kInt32, SideEffects::None(), kNoDexPc) {} + : HExpression(kMipsComputeBaseMethodAddress, + DataType::Type::kInt32, + SideEffects::None(), + kNoDexPc) { + } bool CanBeMoved() const OVERRIDE { return true; } @@ -42,7 +46,7 @@ class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { HInstruction* input, HMipsComputeBaseMethodAddress* method_base, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None(), dex_pc), + : HTemplateInstruction(kMipsPackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); @@ -90,7 +94,10 @@ class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { class HIntermediateArrayAddressIndex FINAL : public HExpression<2> { public: HIntermediateArrayAddressIndex(HInstruction* index, HInstruction* shift, uint32_t dex_pc) - : HExpression(DataType::Type::kInt32, SideEffects::None(), dex_pc) { + : HExpression(kIntermediateArrayAddressIndex, + DataType::Type::kInt32, + SideEffects::None(), + dex_pc) { SetRawInputAt(0, index); SetRawInputAt(1, shift); } diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index e837f1e7e0..29358e1141 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -32,7 +32,8 @@ class HMultiplyAccumulate FINAL : public HExpression<3> { HInstruction* mul_left, HInstruction* mul_right, uint32_t dex_pc = kNoDexPc) - : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) { + : HExpression(kMultiplyAccumulate, type, SideEffects::None(), dex_pc), + op_kind_(op) { SetRawInputAt(kInputAccumulatorIndex, accumulator); SetRawInputAt(kInputMulLeftIndex, mul_left); SetRawInputAt(kInputMulRightIndex, mul_right); @@ -68,7 +69,12 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc), + : HBinaryOperation(kBitwiseNegatedRight, + result_type, + left, + right, + SideEffects::None(), + dex_pc), op_kind_(op) { DCHECK(op == HInstruction::kAnd || op == HInstruction::kOr || op == HInstruction::kXor) << op; } @@ -143,7 +149,10 @@ class HIntermediateAddressIndex FINAL : public HExpression<3> { public: HIntermediateAddressIndex( HInstruction* index, HInstruction* offset, HInstruction* shift, uint32_t dex_pc) - : HExpression(DataType::Type::kInt32, SideEffects::None(), dex_pc) { + : HExpression(kIntermediateAddressIndex, + DataType::Type::kInt32, + SideEffects::None(), + dex_pc) { SetRawInputAt(0, index); SetRawInputAt(1, offset); SetRawInputAt(2, shift); @@ -193,7 +202,7 @@ class HDataProcWithShifterOp FINAL : public HExpression<2> { // is an extension. int shift = 0, uint32_t dex_pc = kNoDexPc) - : HExpression(instr->GetType(), SideEffects::None(), dex_pc), + : HExpression(kDataProcWithShifterOp, instr->GetType(), SideEffects::None(), dex_pc), instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift & (instr->GetType() == DataType::Type::kInt32 ? kMaxIntShiftDistance diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 87dff8403b..0d38d57375 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -71,13 +71,15 @@ class HVecOperation : public HVariableInputSizeInstruction { // TODO: we could introduce SIMD types in HIR. static constexpr DataType::Type kSIMDType = DataType::Type::kFloat64; - HVecOperation(ArenaAllocator* allocator, + HVecOperation(InstructionKind kind, + ArenaAllocator* allocator, DataType::Type packed_type, SideEffects side_effects, size_t number_of_inputs, size_t vector_length, uint32_t dex_pc) - : HVariableInputSizeInstruction(side_effects, + : HVariableInputSizeInstruction(kind, + side_effects, dex_pc, allocator, number_of_inputs, @@ -131,8 +133,6 @@ class HVecOperation : public HVariableInputSizeInstruction { } // Maps an integral type to the same-size signed type and leaves other types alone. - // Can be used to test relaxed type consistency in which packed same-size integral - // types can co-exist, but other type mixes are an error. static DataType::Type ToSignedType(DataType::Type type) { switch (type) { case DataType::Type::kBool: // 1-byte storage unit @@ -160,6 +160,11 @@ class HVecOperation : public HVariableInputSizeInstruction { } } + // Maps an integral type to the same-size (un)signed type. Leaves other types alone. + static DataType::Type ToProperType(DataType::Type type, bool is_unsigned) { + return is_unsigned ? ToUnsignedType(type) : ToSignedType(type); + } + // Helper method to determine if an instruction returns a SIMD value. // TODO: This method is needed until we introduce SIMD as proper type. static bool ReturnsSIMDValue(HInstruction* instruction) { @@ -193,12 +198,14 @@ class HVecOperation : public HVariableInputSizeInstruction { // Abstraction of a unary vector operation. class HVecUnaryOperation : public HVecOperation { public: - HVecUnaryOperation(ArenaAllocator* allocator, + HVecUnaryOperation(InstructionKind kind, + ArenaAllocator* allocator, HInstruction* input, DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecOperation(allocator, + : HVecOperation(kind, + allocator, packed_type, SideEffects::None(), /* number_of_inputs */ 1, @@ -218,13 +225,15 @@ class HVecUnaryOperation : public HVecOperation { // Abstraction of a binary vector operation. class HVecBinaryOperation : public HVecOperation { public: - HVecBinaryOperation(ArenaAllocator* allocator, + HVecBinaryOperation(InstructionKind kind, + ArenaAllocator* allocator, HInstruction* left, HInstruction* right, DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecOperation(allocator, + : HVecOperation(kind, + allocator, packed_type, SideEffects::None(), /* number_of_inputs */ 2, @@ -247,13 +256,15 @@ class HVecBinaryOperation : public HVecOperation { // The Android runtime guarantees elements have at least natural alignment. class HVecMemoryOperation : public HVecOperation { public: - HVecMemoryOperation(ArenaAllocator* allocator, + HVecMemoryOperation(InstructionKind kind, + ArenaAllocator* allocator, DataType::Type packed_type, SideEffects side_effects, size_t number_of_inputs, size_t vector_length, uint32_t dex_pc) - : HVecOperation(allocator, + : HVecOperation(kind, + allocator, packed_type, side_effects, number_of_inputs, @@ -286,6 +297,8 @@ class HVecMemoryOperation : public HVecOperation { }; // Packed type consistency checker ("same vector length" integral types may mix freely). +// Tests relaxed type consistency in which packed same-size integral types can co-exist, +// but other type mixes are an error. inline static bool HasConsistentPackedTypes(HInstruction* input, DataType::Type type) { if (input->IsPhi()) { return input->GetType() == HVecOperation::kSIMDType; // carries SIMD @@ -310,7 +323,8 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecUnaryOperation(allocator, scalar, packed_type, vector_length, dex_pc) { + : HVecUnaryOperation( + kVecReplicateScalar, allocator, scalar, packed_type, vector_length, dex_pc) { DCHECK(!scalar->IsVecOperation()); } @@ -336,7 +350,8 @@ class HVecExtractScalar FINAL : public HVecUnaryOperation { size_t vector_length, size_t index, uint32_t dex_pc) - : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) { + : HVecUnaryOperation( + kVecExtractScalar, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(input, packed_type)); DCHECK_LT(index, vector_length); DCHECK_EQ(index, 0u); @@ -374,7 +389,7 @@ class HVecReduce FINAL : public HVecUnaryOperation { size_t vector_length, ReductionKind kind, uint32_t dex_pc) - : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc), + : HVecUnaryOperation(kVecReduce, allocator, input, packed_type, vector_length, dex_pc), kind_(kind) { DCHECK(HasConsistentPackedTypes(input, packed_type)); } @@ -407,7 +422,7 @@ class HVecCnv FINAL : public HVecUnaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) { + : HVecUnaryOperation(kVecCnv, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(input->IsVecOperation()); DCHECK_NE(GetInputType(), GetResultType()); // actual convert } @@ -432,7 +447,7 @@ class HVecNeg FINAL : public HVecUnaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) { + : HVecUnaryOperation(kVecNeg, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(input, packed_type)); } @@ -454,7 +469,7 @@ class HVecAbs FINAL : public HVecUnaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) { + : HVecUnaryOperation(kVecAbs, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(input, packed_type)); } @@ -476,7 +491,7 @@ class HVecNot FINAL : public HVecUnaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) { + : HVecUnaryOperation(kVecNot, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(input->IsVecOperation()); } @@ -502,7 +517,7 @@ class HVecAdd FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecAdd, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -518,7 +533,7 @@ class HVecAdd FINAL : public HVecBinaryOperation { // Performs halving add on every component in the two vectors, viz. // rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ] // truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ] -// for either both signed or both unsigned operands x, y. +// for either both signed or both unsigned operands x, y (reflected in packed_type). class HVecHalvingAdd FINAL : public HVecBinaryOperation { public: HVecHalvingAdd(ArenaAllocator* allocator, @@ -527,21 +542,14 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, bool is_rounded, - bool is_unsigned, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { - // The `is_unsigned` flag should be used exclusively with the Int32 or Int64. - // This flag is a temporary measure while we do not have the Uint32 and Uint64 data types. - DCHECK(!is_unsigned || - packed_type == DataType::Type::kInt32 || - packed_type == DataType::Type::kInt64) << packed_type; + : HVecBinaryOperation( + kVecHalvingAdd, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); - SetPackedFlag<kFieldHAddIsUnsigned>(is_unsigned); SetPackedFlag<kFieldHAddIsRounded>(is_rounded); } - bool IsUnsigned() const { return GetPackedFlag<kFieldHAddIsUnsigned>(); } bool IsRounded() const { return GetPackedFlag<kFieldHAddIsRounded>(); } bool CanBeMoved() const OVERRIDE { return true; } @@ -549,9 +557,7 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation { bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { DCHECK(other->IsVecHalvingAdd()); const HVecHalvingAdd* o = other->AsVecHalvingAdd(); - return HVecOperation::InstructionDataEquals(o) && - IsUnsigned() == o->IsUnsigned() && - IsRounded() == o->IsRounded(); + return HVecOperation::InstructionDataEquals(o) && IsRounded() == o->IsRounded(); } DECLARE_INSTRUCTION(VecHalvingAdd); @@ -561,8 +567,7 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation { private: // Additional packed bits. - static constexpr size_t kFieldHAddIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; - static constexpr size_t kFieldHAddIsRounded = kFieldHAddIsUnsigned + 1; + static constexpr size_t kFieldHAddIsRounded = HVecOperation::kNumberOfVectorOpPackedBits; static constexpr size_t kNumberOfHAddPackedBits = kFieldHAddIsRounded + 1; static_assert(kNumberOfHAddPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); }; @@ -577,7 +582,7 @@ class HVecSub FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecSub, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -600,7 +605,7 @@ class HVecMul FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecMul, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -623,7 +628,7 @@ class HVecDiv FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecDiv, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -638,7 +643,7 @@ class HVecDiv FINAL : public HVecBinaryOperation { // Takes minimum of every component in the two vectors, // viz. MIN( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ min(x1, y1), .. , min(xn, yn) ] -// for either both signed or both unsigned operands x, y. +// for either both signed or both unsigned operands x, y (reflected in packed_type). class HVecMin FINAL : public HVecBinaryOperation { public: HVecMin(ArenaAllocator* allocator, @@ -646,44 +651,23 @@ class HVecMin FINAL : public HVecBinaryOperation { HInstruction* right, DataType::Type packed_type, size_t vector_length, - bool is_unsigned, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { - // The `is_unsigned` flag should be used exclusively with the Int32 or Int64. - // This flag is a temporary measure while we do not have the Uint32 and Uint64 data types. - DCHECK(!is_unsigned || - packed_type == DataType::Type::kInt32 || - packed_type == DataType::Type::kInt64) << packed_type; + : HVecBinaryOperation(kVecMin, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); - SetPackedFlag<kFieldMinOpIsUnsigned>(is_unsigned); } - bool IsUnsigned() const { return GetPackedFlag<kFieldMinOpIsUnsigned>(); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { - DCHECK(other->IsVecMin()); - const HVecMin* o = other->AsVecMin(); - return HVecOperation::InstructionDataEquals(o) && IsUnsigned() == o->IsUnsigned(); - } - DECLARE_INSTRUCTION(VecMin); protected: DEFAULT_COPY_CONSTRUCTOR(VecMin); - - private: - // Additional packed bits. - static constexpr size_t kFieldMinOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; - static constexpr size_t kNumberOfMinOpPackedBits = kFieldMinOpIsUnsigned + 1; - static_assert(kNumberOfMinOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); }; // Takes maximum of every component in the two vectors, // viz. MAX( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ max(x1, y1), .. , max(xn, yn) ] -// for either both signed or both unsigned operands x, y. +// for either both signed or both unsigned operands x, y (reflected in packed_type). class HVecMax FINAL : public HVecBinaryOperation { public: HVecMax(ArenaAllocator* allocator, @@ -691,39 +675,18 @@ class HVecMax FINAL : public HVecBinaryOperation { HInstruction* right, DataType::Type packed_type, size_t vector_length, - bool is_unsigned, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { - // The `is_unsigned` flag should be used exclusively with the Int32 or Int64. - // This flag is a temporary measure while we do not have the Uint32 and Uint64 data types. - DCHECK(!is_unsigned || - packed_type == DataType::Type::kInt32 || - packed_type == DataType::Type::kInt64) << packed_type; + : HVecBinaryOperation(kVecMax, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); - SetPackedFlag<kFieldMaxOpIsUnsigned>(is_unsigned); } - bool IsUnsigned() const { return GetPackedFlag<kFieldMaxOpIsUnsigned>(); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { - DCHECK(other->IsVecMax()); - const HVecMax* o = other->AsVecMax(); - return HVecOperation::InstructionDataEquals(o) && IsUnsigned() == o->IsUnsigned(); - } - DECLARE_INSTRUCTION(VecMax); protected: DEFAULT_COPY_CONSTRUCTOR(VecMax); - - private: - // Additional packed bits. - static constexpr size_t kFieldMaxOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; - static constexpr size_t kNumberOfMaxOpPackedBits = kFieldMaxOpIsUnsigned + 1; - static_assert(kNumberOfMaxOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); }; // Bitwise-ands every component in the two vectors, @@ -736,7 +699,7 @@ class HVecAnd FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecAnd, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } @@ -758,7 +721,8 @@ class HVecAndNot FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation( + kVecAndNot, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } @@ -780,7 +744,7 @@ class HVecOr FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecOr, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } @@ -802,7 +766,7 @@ class HVecXor FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecXor, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } @@ -824,7 +788,7 @@ class HVecShl FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecShl, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); } @@ -846,7 +810,7 @@ class HVecShr FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecShr, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); } @@ -868,7 +832,7 @@ class HVecUShr FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecUShr, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); } @@ -895,7 +859,8 @@ class HVecSetScalars FINAL : public HVecOperation { size_t vector_length, size_t number_of_scalars, uint32_t dex_pc) - : HVecOperation(allocator, + : HVecOperation(kVecSetScalars, + allocator, packed_type, SideEffects::None(), number_of_scalars, @@ -929,7 +894,8 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecOperation(allocator, + : HVecOperation(kVecMultiplyAccumulate, + allocator, packed_type, SideEffects::None(), /* number_of_inputs */ 3, @@ -979,7 +945,8 @@ class HVecSADAccumulate FINAL : public HVecOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecOperation(allocator, + : HVecOperation(kVecSADAccumulate, + allocator, packed_type, SideEffects::None(), /* number_of_inputs */ 3, @@ -1013,7 +980,8 @@ class HVecLoad FINAL : public HVecMemoryOperation { size_t vector_length, bool is_string_char_at, uint32_t dex_pc) - : HVecMemoryOperation(allocator, + : HVecMemoryOperation(kVecLoad, + allocator, packed_type, side_effects, /* number_of_inputs */ 2, @@ -1058,7 +1026,8 @@ class HVecStore FINAL : public HVecMemoryOperation { SideEffects side_effects, size_t vector_length, uint32_t dex_pc) - : HVecMemoryOperation(allocator, + : HVecMemoryOperation(kVecStore, + allocator, packed_type, side_effects, /* number_of_inputs */ 3, diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc index ab9d7594d9..af13449646 100644 --- a/compiler/optimizing/nodes_vector_test.cc +++ b/compiler/optimizing/nodes_vector_test.cc @@ -282,143 +282,53 @@ TEST_F(NodesVectorTest, VectorAlignmentMattersOnStore) { EXPECT_FALSE(v0->Equals(v1)); // no longer equal } -TEST_F(NodesVectorTest, VectorSignMattersOnMin) { - HVecOperation* p0 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc); - HVecOperation* p1 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kInt8, 4, kNoDexPc); - HVecOperation* p2 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kInt16, 4, kNoDexPc); - - HVecMin* v0 = new (GetAllocator()) HVecMin( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ true, kNoDexPc); - HVecMin* v1 = new (GetAllocator()) HVecMin( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ false, kNoDexPc); - HVecMin* v2 = new (GetAllocator()) HVecMin( - GetAllocator(), p0, p0, DataType::Type::kInt32, 2, /*is_unsigned*/ true, kNoDexPc); - HVecMin* v3 = new (GetAllocator()) HVecMin( - GetAllocator(), p1, p1, DataType::Type::kUint8, 16, /*is_unsigned*/ false, kNoDexPc); - HVecMin* v4 = new (GetAllocator()) HVecMin( - GetAllocator(), p1, p1, DataType::Type::kInt8, 16, /*is_unsigned*/ false, kNoDexPc); - HVecMin* v5 = new (GetAllocator()) HVecMin( - GetAllocator(), p2, p2, DataType::Type::kUint16, 8, /*is_unsigned*/ false, kNoDexPc); - HVecMin* v6 = new (GetAllocator()) HVecMin( - GetAllocator(), p2, p2, DataType::Type::kInt16, 8, /*is_unsigned*/ false, kNoDexPc); - HVecMin* min_insns[] = { v0, v1, v2, v3, v4, v5, v6 }; - - EXPECT_FALSE(p0->CanBeMoved()); - EXPECT_FALSE(p1->CanBeMoved()); - EXPECT_FALSE(p2->CanBeMoved()); - - for (HVecMin* min_insn : min_insns) { - EXPECT_TRUE(min_insn->CanBeMoved()); - } - - // Deprecated; IsUnsigned() should be removed with the introduction of Uint32 and Uint64. - EXPECT_TRUE(v0->IsUnsigned()); - EXPECT_FALSE(v1->IsUnsigned()); - EXPECT_TRUE(v2->IsUnsigned()); - - for (HVecMin* min_insn1 : min_insns) { - for (HVecMin* min_insn2 : min_insns) { - EXPECT_EQ(min_insn1 == min_insn2, min_insn1->Equals(min_insn2)); - } - } -} - -TEST_F(NodesVectorTest, VectorSignMattersOnMax) { - HVecOperation* p0 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc); - HVecOperation* p1 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kInt8, 4, kNoDexPc); - HVecOperation* p2 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kInt16, 4, kNoDexPc); - - HVecMax* v0 = new (GetAllocator()) HVecMax( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ true, kNoDexPc); - HVecMax* v1 = new (GetAllocator()) HVecMax( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ false, kNoDexPc); - HVecMax* v2 = new (GetAllocator()) HVecMax( - GetAllocator(), p0, p0, DataType::Type::kInt32, 2, /*is_unsigned*/ true, kNoDexPc); - HVecMax* v3 = new (GetAllocator()) HVecMax( - GetAllocator(), p1, p1, DataType::Type::kUint8, 16, /*is_unsigned*/ false, kNoDexPc); - HVecMax* v4 = new (GetAllocator()) HVecMax( - GetAllocator(), p1, p1, DataType::Type::kInt8, 16, /*is_unsigned*/ false, kNoDexPc); - HVecMax* v5 = new (GetAllocator()) HVecMax( - GetAllocator(), p2, p2, DataType::Type::kUint16, 8, /*is_unsigned*/ false, kNoDexPc); - HVecMax* v6 = new (GetAllocator()) HVecMax( - GetAllocator(), p2, p2, DataType::Type::kInt16, 8, /*is_unsigned*/ false, kNoDexPc); - HVecMax* max_insns[] = { v0, v1, v2, v3, v4, v5, v6 }; - - EXPECT_FALSE(p0->CanBeMoved()); - EXPECT_FALSE(p1->CanBeMoved()); - EXPECT_FALSE(p2->CanBeMoved()); - - for (HVecMax* max_insn : max_insns) { - EXPECT_TRUE(max_insn->CanBeMoved()); - } - - // Deprecated; IsUnsigned() should be removed with the introduction of Uint32 and Uint64. - EXPECT_TRUE(v0->IsUnsigned()); - EXPECT_FALSE(v1->IsUnsigned()); - EXPECT_TRUE(v2->IsUnsigned()); - - for (HVecMax* max_insn1 : max_insns) { - for (HVecMax* max_insn2 : max_insns) { - EXPECT_EQ(max_insn1 == max_insn2, max_insn1->Equals(max_insn2)); - } - } -} - TEST_F(NodesVectorTest, VectorAttributesMatterOnHalvingAdd) { + HVecOperation* u0 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kUint32, 4, kNoDexPc); + HVecOperation* u1 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kUint16, 8, kNoDexPc); + HVecOperation* u2 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kUint8, 16, kNoDexPc); + HVecOperation* p0 = new (GetAllocator()) HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc); HVecOperation* p1 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kInt8, 4, kNoDexPc); + HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kInt16, 8, kNoDexPc); HVecOperation* p2 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kInt16, 4, kNoDexPc); + HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kInt8, 16, kNoDexPc); HVecHalvingAdd* v0 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, - /*is_rounded*/ true, /*is_unsigned*/ true, kNoDexPc); + GetAllocator(), u0, u0, DataType::Type::kUint32, 4, /*is_rounded*/ true, kNoDexPc); HVecHalvingAdd* v1 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, - /*is_rounded*/ false, /*is_unsigned*/ true, kNoDexPc); + GetAllocator(), u0, u0, DataType::Type::kUint32, 4, /*is_rounded*/ false, kNoDexPc); HVecHalvingAdd* v2 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, - /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_rounded*/ true, kNoDexPc); HVecHalvingAdd* v3 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, - /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_rounded*/ false, kNoDexPc); + HVecHalvingAdd* v4 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p0, p0, DataType::Type::kInt32, 2, - /*is_rounded*/ true, /*is_unsigned*/ true, kNoDexPc); + GetAllocator(), u1, u1, DataType::Type::kUint16, 8, /*is_rounded*/ true, kNoDexPc); HVecHalvingAdd* v5 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p1, p1, DataType::Type::kUint8, 16, - /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), u1, u1, DataType::Type::kUint16, 8, /*is_rounded*/ false, kNoDexPc); HVecHalvingAdd* v6 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p1, p1, DataType::Type::kUint8, 16, - /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), p1, p1, DataType::Type::kInt16, 8, /*is_rounded*/ true, kNoDexPc); HVecHalvingAdd* v7 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p1, p1, DataType::Type::kInt8, 16, - /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), p1, p1, DataType::Type::kInt16, 8, /*is_rounded*/ false, kNoDexPc); + HVecHalvingAdd* v8 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p1, p1, DataType::Type::kInt8, 16, - /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), u2, u2, DataType::Type::kUint8, 16, /*is_rounded*/ true, kNoDexPc); HVecHalvingAdd* v9 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p2, p2, DataType::Type::kUint16, 8, - /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), u2, u2, DataType::Type::kUint8, 16, /*is_rounded*/ false, kNoDexPc); HVecHalvingAdd* v10 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p2, p2, DataType::Type::kUint16, 8, - /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), p2, p2, DataType::Type::kInt8, 16, /*is_rounded*/ true, kNoDexPc); HVecHalvingAdd* v11 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p2, p2, DataType::Type::kInt16, 2, - /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc); - HVecHalvingAdd* v12 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p2, p2, DataType::Type::kInt16, 2, - /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc); - HVecHalvingAdd* hadd_insns[] = { v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12 }; + GetAllocator(), p2, p2, DataType::Type::kInt8, 16, /*is_rounded*/ false, kNoDexPc); + HVecHalvingAdd* hadd_insns[] = { v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 }; + + EXPECT_FALSE(u0->CanBeMoved()); + EXPECT_FALSE(u1->CanBeMoved()); + EXPECT_FALSE(u2->CanBeMoved()); EXPECT_FALSE(p0->CanBeMoved()); EXPECT_FALSE(p1->CanBeMoved()); EXPECT_FALSE(p2->CanBeMoved()); @@ -427,26 +337,18 @@ TEST_F(NodesVectorTest, VectorAttributesMatterOnHalvingAdd) { EXPECT_TRUE(hadd_insn->CanBeMoved()); } - // Deprecated; IsUnsigned() should be removed with the introduction of Uint32 and Uint64. - EXPECT_TRUE(v0->IsUnsigned()); - EXPECT_TRUE(v1->IsUnsigned()); - EXPECT_TRUE(!v2->IsUnsigned()); - EXPECT_TRUE(!v3->IsUnsigned()); - EXPECT_TRUE(v4->IsUnsigned()); - EXPECT_TRUE(v0->IsRounded()); EXPECT_TRUE(!v1->IsRounded()); EXPECT_TRUE(v2->IsRounded()); EXPECT_TRUE(!v3->IsRounded()); EXPECT_TRUE(v4->IsRounded()); - EXPECT_TRUE(v5->IsRounded()); - EXPECT_TRUE(!v6->IsRounded()); - EXPECT_TRUE(v7->IsRounded()); - EXPECT_TRUE(!v8->IsRounded()); - EXPECT_TRUE(v9->IsRounded()); - EXPECT_TRUE(!v10->IsRounded()); - EXPECT_TRUE(v11->IsRounded()); - EXPECT_TRUE(!v12->IsRounded()); + EXPECT_TRUE(!v5->IsRounded()); + EXPECT_TRUE(v6->IsRounded()); + EXPECT_TRUE(!v7->IsRounded()); + EXPECT_TRUE(v8->IsRounded()); + EXPECT_TRUE(!v9->IsRounded()); + EXPECT_TRUE(v10->IsRounded()); + EXPECT_TRUE(!v11->IsRounded()); for (HVecHalvingAdd* hadd_insn1 : hadd_insns) { for (HVecHalvingAdd* hadd_insn2 : hadd_insns) { diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index 6326065fe2..4c32be7d15 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -24,7 +24,11 @@ class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> { public: // Treat the value as an int32_t, but it is really a 32 bit native pointer. HX86ComputeBaseMethodAddress() - : HExpression(DataType::Type::kInt32, SideEffects::None(), kNoDexPc) {} + : HExpression(kX86ComputeBaseMethodAddress, + DataType::Type::kInt32, + SideEffects::None(), + kNoDexPc) { + } bool CanBeMoved() const OVERRIDE { return true; } @@ -39,7 +43,10 @@ class HX86LoadFromConstantTable FINAL : public HExpression<2> { public: HX86LoadFromConstantTable(HX86ComputeBaseMethodAddress* method_base, HConstant* constant) - : HExpression(constant->GetType(), SideEffects::None(), kNoDexPc) { + : HExpression(kX86LoadFromConstantTable, + constant->GetType(), + SideEffects::None(), + kNoDexPc) { SetRawInputAt(0, method_base); SetRawInputAt(1, constant); } @@ -65,7 +72,7 @@ class HX86FPNeg FINAL : public HExpression<2> { HInstruction* input, HX86ComputeBaseMethodAddress* method_base, uint32_t dex_pc) - : HExpression(result_type, SideEffects::None(), dex_pc) { + : HExpression(kX86FPNeg, result_type, SideEffects::None(), dex_pc) { DCHECK(DataType::IsFloatingPointType(result_type)); SetRawInputAt(0, input); SetRawInputAt(1, method_base); @@ -89,7 +96,7 @@ class HX86PackedSwitch FINAL : public HTemplateInstruction<2> { HInstruction* input, HX86ComputeBaseMethodAddress* method_base, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None(), dex_pc), + : HTemplateInstruction(kX86PackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index c35c490118..e42dfc10ba 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -382,7 +382,8 @@ class OptimizingCompiler FINAL : public Compiler { PassObserver* pass_observer, VariableSizedHandleScope* handles) const; - void GenerateJitDebugInfo(debug::MethodDebugInfo method_debug_info); + void GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo method_debug_info) + REQUIRES_SHARED(Locks::mutator_lock_); std::unique_ptr<OptimizingCompilerStats> compilation_stats_; @@ -1248,7 +1249,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = nullptr; info.cfi = jni_compiled_method.GetCfi(); - GenerateJitDebugInfo(info); + GenerateJitDebugInfo(method, info); } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); @@ -1372,7 +1373,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = stack_map_size == 0 ? nullptr : stack_map_data; info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); - GenerateJitDebugInfo(info); + GenerateJitDebugInfo(method, info); } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); @@ -1396,7 +1397,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, return true; } -void OptimizingCompiler::GenerateJitDebugInfo(debug::MethodDebugInfo info) { +void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo info) { const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); DCHECK(compiler_options.GenerateAnyDebugInfo()); @@ -1409,9 +1410,13 @@ void OptimizingCompiler::GenerateJitDebugInfo(debug::MethodDebugInfo info) { GetCompilerDriver()->GetInstructionSetFeatures(), mini_debug_info, ArrayRef<const debug::MethodDebugInfo>(&info, 1)); - MutexLock mu(Thread::Current(), g_jit_debug_mutex); - JITCodeEntry* entry = CreateJITCodeEntry(elf_file); - IncrementJITCodeEntryRefcount(entry, info.code_address); + MutexLock mu(Thread::Current(), *Locks::native_debug_interface_lock_); + AddNativeDebugInfoForJit(reinterpret_cast<const void*>(info.code_address), elf_file); + + VLOG(jit) + << "JIT mini-debug-info added for " << ArtMethod::PrettyMethod(method) + << " size=" << PrettySize(elf_file.size()) + << " total_size=" << PrettySize(GetJitNativeDebugInfoMemUsage()); } } // namespace art diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index a6a2f46d2e..00194ff1fe 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -22,9 +22,9 @@ #include <string> #include <type_traits> -#include "atomic.h" +#include "base/atomic.h" +#include "base/globals.h" #include "base/logging.h" // For VLOG_IS_ON. -#include "globals.h" namespace art { @@ -99,7 +99,6 @@ enum class MethodCompilationStat { kConstructorFenceRemovedLSE, kConstructorFenceRemovedPFRA, kConstructorFenceRemovedCFRE, - kBitstringTypeCheck, kJitOutOfMemoryForCommit, kLastStat }; diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index f92f4b274a..2591783cb6 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -238,6 +238,9 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { case Intrinsics::kMathMaxFloatFloat: case Intrinsics::kMathMinDoubleDouble: case Intrinsics::kMathMinFloatFloat: + LOG(FATAL) << "Unreachable min/max/abs: intrinsics should have been lowered " + "to IR nodes by instruction simplifier"; + UNREACHABLE(); case Intrinsics::kMathRoundFloat: if (!base_added) { DCHECK(invoke_static_or_direct != nullptr); diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 59733397bf..f843c008d8 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -34,20 +34,6 @@ void PrepareForRegisterAllocation::Run() { } } -void PrepareForRegisterAllocation::VisitCheckCast(HCheckCast* check_cast) { - // Record only those bitstring type checks that make it to the codegen stage. - if (check_cast->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { - MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); - } -} - -void PrepareForRegisterAllocation::VisitInstanceOf(HInstanceOf* instance_of) { - // Record only those bitstring type checks that make it to the codegen stage. - if (instance_of->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { - MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); - } -} - void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) { check->ReplaceWith(check->InputAt(0)); } diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index f6e4d3ef99..2c64f016c1 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -40,8 +40,6 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { "prepare_for_register_allocation"; private: - void VisitCheckCast(HCheckCast* check_cast) OVERRIDE; - void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE; void VisitNullCheck(HNullCheck* check) OVERRIDE; void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE; void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 178d7fd0f0..67a61fc01d 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -87,7 +87,6 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE; void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; void VisitLoadClass(HLoadClass* load_class) OVERRIDE; - void VisitInstanceOf(HInstanceOf* load_class) OVERRIDE; void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE; void VisitLoadString(HLoadString* instr) OVERRIDE; void VisitLoadException(HLoadException* instr) OVERRIDE; @@ -172,12 +171,6 @@ void ReferenceTypePropagation::ValidateTypes() { << "NullCheck " << instr->GetReferenceTypeInfo() << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo(); } - } else if (instr->IsInstanceOf()) { - HInstanceOf* iof = instr->AsInstanceOf(); - DCHECK(!iof->GetTargetClassRTI().IsValid() || iof->GetTargetClassRTI().IsExact()); - } else if (instr->IsCheckCast()) { - HCheckCast* check = instr->AsCheckCast(); - DCHECK(!check->GetTargetClassRTI().IsValid() || check->GetTargetClassRTI().IsExact()); } } } @@ -506,7 +499,8 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* return; } - ReferenceTypeInfo class_rti = instanceOf->GetTargetClassRTI(); + HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); if (!class_rti.IsValid()) { // He have loaded an unresolved class. Don't bother bounding the type. return; @@ -543,14 +537,13 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst Thread* self = Thread::Current(); StackHandleScope<2> hs(self); const DexFile& dex_file = *invoke->GetTargetMethod().dex_file; + uint32_t dex_method_index = invoke->GetTargetMethod().index; Handle<mirror::DexCache> dex_cache( hs.NewHandle(FindDexCacheWithHint(self, dex_file, hint_dex_cache_))); - // Use a null loader. We should probably use the compiling method's class loader, - // but then we would need to pass it to RTPVisitor just for this debug check. Since - // the method is from the String class, the null loader is good enough. + // Use a null loader, the target method is in a boot classpath dex file. Handle<mirror::ClassLoader> loader(hs.NewHandle<mirror::ClassLoader>(nullptr)); ArtMethod* method = cl->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>( - invoke->GetDexMethodIndex(), dex_cache, loader, /* referrer */ nullptr, kDirect); + dex_method_index, dex_cache, loader, /* referrer */ nullptr, kDirect); DCHECK(method != nullptr); mirror::Class* declaring_class = method->GetDeclaringClass(); DCHECK(declaring_class != nullptr); @@ -650,20 +643,15 @@ void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet( void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); - if (IsAdmissible(instr->GetClass().Get())) { - instr->SetValidLoadedClassRTI(); + Handle<mirror::Class> resolved_class = instr->GetClass(); + if (IsAdmissible(resolved_class.Get())) { + instr->SetLoadedClassRTI(ReferenceTypeInfo::Create( + resolved_class, /* is_exact */ true)); } instr->SetReferenceTypeInfo( ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true)); } -void ReferenceTypePropagation::RTPVisitor::VisitInstanceOf(HInstanceOf* instr) { - ScopedObjectAccess soa(Thread::Current()); - if (IsAdmissible(instr->GetClass().Get())) { - instr->SetValidTargetClassRTI(); - } -} - void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) { instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo()); } @@ -731,6 +719,8 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) { } void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { + HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); HBoundType* bound_type = check_cast->GetNext()->AsBoundType(); if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) { // The next instruction is not an uninitialized BoundType. This must be @@ -739,14 +729,12 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast } DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0)); - ScopedObjectAccess soa(Thread::Current()); - Handle<mirror::Class> klass = check_cast->GetClass(); - if (IsAdmissible(klass.Get())) { + if (class_rti.IsValid()) { DCHECK(is_first_run_); - check_cast->SetValidTargetClassRTI(); + ScopedObjectAccess soa(Thread::Current()); // This is the first run of RTP and class is resolved. - bool is_exact = klass->CannotBeAssignedFromOtherTypes(); - bound_type->SetUpperBound(ReferenceTypeInfo::Create(klass, is_exact), + bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes(); + bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact), /* CheckCast succeeds for nulls. */ true); } else { // This is the first run of RTP and class is unresolved. Remove the binding. diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 1d3fe0334d..27f9ac3990 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -103,6 +103,7 @@ void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoint case DataType::Type::kFloat64: slot += long_spill_slots; FALLTHROUGH_INTENDED; + case DataType::Type::kUint64: case DataType::Type::kInt64: slot += float_spill_slots; FALLTHROUGH_INTENDED; @@ -110,6 +111,7 @@ void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoint slot += int_spill_slots; FALLTHROUGH_INTENDED; case DataType::Type::kReference: + case DataType::Type::kUint32: case DataType::Type::kInt32: case DataType::Type::kUint16: case DataType::Type::kUint8: diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index ad5248e982..fa7ad82316 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -1972,6 +1972,8 @@ void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* case DataType::Type::kInt16: int_intervals.push_back(parent); break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType(); UNREACHABLE(); diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index cfe63bd758..216fb57a96 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -1131,6 +1131,8 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) { case DataType::Type::kInt16: spill_slots = &int_spill_slots_; break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected type for interval " << interval->GetType(); } diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index bb28d50b56..bca538fb17 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -667,7 +667,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { // HUnaryOperation (or HBinaryOperation), check in debug mode that we have // the exhaustive lists here. if (instruction->IsUnaryOperation()) { - DCHECK(instruction->IsBooleanNot() || + DCHECK(instruction->IsAbs() || + instruction->IsBooleanNot() || instruction->IsNot() || instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName(); return true; @@ -678,6 +679,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { instruction->IsCompare() || instruction->IsCondition() || instruction->IsDiv() || + instruction->IsMin() || + instruction->IsMax() || instruction->IsMul() || instruction->IsOr() || instruction->IsRem() || diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index dffef17587..b65628e441 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -125,8 +125,14 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, BootImageAOTCanEmbedMethod(callee, compiler_driver)) { method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + } else if (IsInBootImage(callee)) { + // Use PC-relative access to the .data.bimg.rel.ro methods array. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo; + uint8_t* begin = Runtime::Current()->GetHeap()->GetBootImageSpaces().front()->Begin(); + method_load_data = reinterpret_cast<uintptr_t>(callee) - reinterpret_cast<uintptr_t>(begin); + code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } else { - // Use PC-relative access to the .bss methods arrays. + // Use PC-relative access to the .bss methods array. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } @@ -236,75 +242,6 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( return load_kind; } -static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass, - CodeGenerator* codegen, - CompilerDriver* compiler_driver) - REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK(!klass->IsProxyClass()); - DCHECK(!klass->IsArrayClass()); - - if (Runtime::Current()->UseJitCompilation()) { - // If we're JITting, try to assign a type check bitstring (fall through). - } else if (codegen->GetCompilerOptions().IsBootImage()) { - const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex()); - if (!compiler_driver->IsImageClass(descriptor)) { - return false; - } - // If the target is a boot image class, try to assign a type check bitstring (fall through). - // (If --force-determinism, this was already done; repeating is OK and yields the same result.) - } else { - // TODO: Use the bitstring also for AOT app compilation if the target class has a bitstring - // already assigned in the boot image. - return false; - } - - // Try to assign a type check bitstring. - MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); - if ((false) && // FIXME: Inliner does not respect compiler_driver->IsClassToCompile() - // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569 - kIsDebugBuild && - codegen->GetCompilerOptions().IsBootImage() && - codegen->GetCompilerOptions().IsForceDeterminism()) { - SubtypeCheckInfo::State old_state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); - CHECK(old_state == SubtypeCheckInfo::kAssigned || old_state == SubtypeCheckInfo::kOverflowed) - << klass->PrettyDescriptor() << "/" << old_state - << " in " << codegen->GetGraph()->PrettyMethod(); - } - SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass); - return state == SubtypeCheckInfo::kAssigned; -} - -TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - bool needs_access_check) { - if (klass == nullptr) { - return TypeCheckKind::kUnresolvedCheck; - } else if (klass->IsInterface()) { - return TypeCheckKind::kInterfaceCheck; - } else if (klass->IsArrayClass()) { - if (klass->GetComponentType()->IsObjectClass()) { - return TypeCheckKind::kArrayObjectCheck; - } else if (klass->CannotBeAssignedFromOtherTypes()) { - return TypeCheckKind::kExactCheck; - } else { - return TypeCheckKind::kArrayCheck; - } - } else if (klass->IsFinal()) { // TODO: Consider using bitstring for final classes. - return TypeCheckKind::kExactCheck; - } else if (kUseBitstringTypeCheck && - !needs_access_check && - CanUseTypeCheckBitstring(klass, codegen, compiler_driver)) { - // TODO: We should not need the `!needs_access_check` check but getting rid of that - // requires rewriting some optimizations in instruction simplifier. - return TypeCheckKind::kBitstringCheck; - } else if (klass->IsAbstract()) { - return TypeCheckKind::kAbstractClassCheck; - } else { - return TypeCheckKind::kClassHierarchyCheck; - } -} - void HSharpening::ProcessLoadString( HLoadString* load_string, CodeGenerator* codegen, diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index fa3e948eeb..6df7d6d91e 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -44,10 +44,12 @@ class HSharpening : public HOptimization { static constexpr const char* kSharpeningPassName = "sharpening"; - // Used by Sharpening and InstructionSimplifier. - static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, - CodeGenerator* codegen, - CompilerDriver* compiler_driver); + // Used by the builder. + static void ProcessLoadString(HLoadString* load_string, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + const DexCompilationUnit& dex_compilation_unit, + VariableSizedHandleScope* handles); // Used by the builder and the inliner. static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class, @@ -56,19 +58,10 @@ class HSharpening : public HOptimization { const DexCompilationUnit& dex_compilation_unit) REQUIRES_SHARED(Locks::mutator_lock_); - // Used by the builder. - static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - bool needs_access_check) - REQUIRES_SHARED(Locks::mutator_lock_); - - // Used by the builder. - static void ProcessLoadString(HLoadString* load_string, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexCompilationUnit& dex_compilation_unit, - VariableSizedHandleScope* handles); + // Used by Sharpening and InstructionSimplifier. + static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, + CodeGenerator* codegen, + CompilerDriver* compiler_driver); private: CodeGenerator* codegen_; |