diff options
Diffstat (limited to 'compiler/optimizing')
40 files changed, 2711 insertions, 2837 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 01155dcd37..ff59173c8b 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -43,6 +43,7 @@ #include "base/bit_utils.h" #include "base/bit_utils_iterator.h" #include "base/casts.h" +#include "base/leb128.h" #include "bytecode_utils.h" #include "class_linker.h" #include "compiled_method.h" @@ -52,7 +53,6 @@ #include "graph_visualizer.h" #include "intern_table.h" #include "intrinsics.h" -#include "leb128.h" #include "mirror/array-inl.h" #include "mirror/object_array-inl.h" #include "mirror/object_reference.h" diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 3c5a37f958..60de722285 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -618,14 +618,18 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { protected: // Patch info used for recording locations of required linker patches and their targets, - // i.e. target method, string, type or code identified by their dex file and index. + // i.e. target method, string, type or code identified by their dex file and index, + // or .data.bimg.rel.ro entries identified by the boot image offset. template <typename LabelType> struct PatchInfo { - PatchInfo(const DexFile& target_dex_file, uint32_t target_index) - : dex_file(target_dex_file), index(target_index) { } - - const DexFile& dex_file; - uint32_t index; + PatchInfo(const DexFile* dex_file, uint32_t off_or_idx) + : target_dex_file(dex_file), offset_or_index(off_or_idx), label() { } + + // Target dex file or null for .data.bmig.rel.ro patches. + const DexFile* target_dex_file; + // Either the boot image offset (to write to .data.bmig.rel.ro) or string/type/method index. + uint32_t offset_or_index; + // Label for the instruction to patch. LabelType label; }; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 3fd88e3e18..b0ddd8e8c6 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -78,6 +78,7 @@ using helpers::OutputFPRegister; using helpers::OutputRegister; using helpers::QRegisterFrom; using helpers::RegisterFrom; +using helpers::SRegisterFrom; using helpers::StackOperandFrom; using helpers::VIXLRegCodeFromART; using helpers::WRegisterFrom; @@ -1395,11 +1396,11 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), @@ -4447,11 +4448,11 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { DCHECK(GetCompilerOptions().IsBootImage()); // Add ADRP with its PC-relative method patch. - vixl::aarch64::Label* adrp_label = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + vixl::aarch64::Label* adrp_label = NewBootImageMethodPatch(invoke->GetTargetMethod()); EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); // Add ADD with its PC-relative method patch. vixl::aarch64::Label* add_label = - NewPcRelativeMethodPatch(invoke->GetTargetMethod(), adrp_label); + NewBootImageMethodPatch(invoke->GetTargetMethod(), adrp_label); EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } @@ -4559,51 +4560,47 @@ void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* i codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch( +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch( MethodReference target_method, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - adrp_label, - &pc_relative_method_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch( MethodReference target_method, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - adrp_label, - &method_bss_entry_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch( +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &pc_relative_type_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch( const DexFile& dex_file, dex::StringIndex string_index, vixl::aarch64::Label* adrp_label) { - return - NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); + return NewPcRelativePatch( + &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( const DexFile& dex_file, dex::StringIndex string_index, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { @@ -4612,7 +4609,7 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t cust } vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( - const DexFile& dex_file, + const DexFile* dex_file, uint32_t offset_or_index, vixl::aarch64::Label* adrp_label, ArenaDeque<PcRelativePatchInfo>* patches) { @@ -4679,7 +4676,7 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( ArenaVector<linker::LinkerPatch>* linker_patches) { for (const PcRelativePatchInfo& info : infos) { linker_patches->push_back(Factory(info.label.GetLocation(), - &info.target_dex_file, + info.target_dex_file, info.pc_insn_label->GetLocation(), info.offset_or_index)); } @@ -4688,27 +4685,27 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_method_patches_.size() + + boot_image_method_patches_.size() + method_bss_entry_patches_.size() + - pc_relative_type_patches_.size() + + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - pc_relative_string_patches_.size() + + boot_image_string_patches_.size() + string_bss_entry_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( - pc_relative_method_patches_, linker_patches); + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } else { - DCHECK(pc_relative_method_patches_.empty()); + DCHECK(boot_image_method_patches_.empty()); EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -4876,11 +4873,11 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA // Add ADRP with its PC-relative type patch. const DexFile& dex_file = cls->GetDexFile(); dex::TypeIndex type_index = cls->GetTypeIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative type patch. vixl::aarch64::Label* add_label = - codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label); + codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label); codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); break; } @@ -4897,11 +4894,11 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA // Add ADRP with its PC-relative type patch. const DexFile& dex_file = cls->GetDexFile(); dex::TypeIndex type_index = cls->GetTypeIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add LDR with its PC-relative type patch. vixl::aarch64::Label* ldr_label = - codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label); + codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label); codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); // Extract the reference from the slot data, i.e. clear the hash bits. int32_t masked_hash = ClassTable::TableSlot::MaskHash( @@ -5044,11 +5041,11 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD // Add ADRP with its PC-relative String patch. const DexFile& dex_file = load->GetDexFile(); const dex::StringIndex string_index = load->GetStringIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative String patch. vixl::aarch64::Label* add_label = - codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label); codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); return; } @@ -5064,11 +5061,11 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD // Add ADRP with its PC-relative String patch. const DexFile& dex_file = load->GetDexFile(); const dex::StringIndex string_index = load->GetStringIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add LDR with its PC-relative String patch. vixl::aarch64::Label* ldr_label = - codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label); codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); return; } @@ -5466,6 +5463,159 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { } } +// TODO: integrate with HandleBinaryOp? +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorARM64::GenerateMinMax(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1 = locations->InAt(0); + Location op2 = locations->InAt(1); + Location out = locations->Out(); + + Register op1_reg; + Register op2_reg; + Register out_reg; + if (type == DataType::Type::kInt64) { + op1_reg = XRegisterFrom(op1); + op2_reg = XRegisterFrom(op2); + out_reg = XRegisterFrom(out); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + op1_reg = WRegisterFrom(op1); + op2_reg = WRegisterFrom(op2); + out_reg = WRegisterFrom(out); + } + + __ Cmp(op1_reg, op2_reg); + __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt); +} + +void InstructionCodeGeneratorARM64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1 = locations->InAt(0); + Location op2 = locations->InAt(1); + Location out = locations->Out(); + + FPRegister op1_reg; + FPRegister op2_reg; + FPRegister out_reg; + if (type == DataType::Type::kFloat64) { + op1_reg = DRegisterFrom(op1); + op2_reg = DRegisterFrom(op2); + out_reg = DRegisterFrom(out); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + op1_reg = SRegisterFrom(op1); + op2_reg = SRegisterFrom(op2); + out_reg = SRegisterFrom(out); + } + + if (is_min) { + __ Fmin(out_reg, op1_reg, op2_reg); + } else { + __ Fmax(out_reg, op1_reg, op2_reg); + } +} + +void LocationsBuilderARM64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +// TODO: integrate with HandleBinaryOp? +void InstructionCodeGeneratorARM64::VisitMin(HMin* min) { + switch (min->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMin " << min->GetResultType(); + } +} + +void LocationsBuilderARM64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorARM64::VisitMax(HMax* max) { + switch (max->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMax " << max->GetResultType(); + } +} + +void LocationsBuilderARM64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) { + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + Register in_reg = InputRegisterAt(abs, 0); + Register out_reg = OutputRegister(abs); + __ Cmp(in_reg, Operand(0)); + __ Cneg(out_reg, in_reg, lt); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + FPRegister in_reg = InputFPRegisterAt(abs, 0); + FPRegister out_reg = OutputFPRegister(abs); + __ Fabs(out_reg, in_reg); + break; + } + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index f92c94fda7..70f5500016 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -273,6 +273,9 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void HandleCondition(HCondition* instruction); + void GenerateMinMax(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -565,8 +568,8 @@ class CodeGeneratorARM64 : public CodeGenerator { // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeMethodPatch(MethodReference target_method, - vixl::aarch64::Label* adrp_label = nullptr); + vixl::aarch64::Label* NewBootImageMethodPatch(MethodReference target_method, + vixl::aarch64::Label* adrp_label = nullptr); // Add a new .bss entry method patch for an instruction and return // the label to be bound before the instruction. The instruction will be @@ -579,9 +582,9 @@ class CodeGeneratorARM64 : public CodeGenerator { // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeTypePatch(const DexFile& dex_file, - dex::TypeIndex type_index, - vixl::aarch64::Label* adrp_label = nullptr); + vixl::aarch64::Label* NewBootImageTypePatch(const DexFile& dex_file, + dex::TypeIndex type_index, + vixl::aarch64::Label* adrp_label = nullptr); // Add a new .bss entry type patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the @@ -595,9 +598,9 @@ class CodeGeneratorARM64 : public CodeGenerator { // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, - vixl::aarch64::Label* adrp_label = nullptr); + vixl::aarch64::Label* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label = nullptr); // Add a new .bss entry string patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the @@ -777,17 +780,12 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value); vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value); - // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays - // and boot image strings/types. The only difference is the interpretation of the - // offset_or_index. - struct PcRelativePatchInfo { - PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx) - : target_dex_file(dex_file), offset_or_index(off_or_idx), label(), pc_insn_label() { } + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + struct PcRelativePatchInfo : PatchInfo<vixl::aarch64::Label> { + PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx) + : PatchInfo<vixl::aarch64::Label>(dex_file, off_or_idx), pc_insn_label() { } - const DexFile& target_dex_file; - // Either the dex cache array element offset or the string/type index. - uint32_t offset_or_index; - vixl::aarch64::Label label; vixl::aarch64::Label* pc_insn_label; }; @@ -798,7 +796,7 @@ class CodeGeneratorARM64 : public CodeGenerator { uint32_t custom_data; }; - vixl::aarch64::Label* NewPcRelativePatch(const DexFile& dex_file, + vixl::aarch64::Label* NewPcRelativePatch(const DexFile* dex_file, uint32_t offset_or_index, vixl::aarch64::Label* adrp_label, ArenaDeque<PcRelativePatchInfo>* patches); @@ -826,15 +824,15 @@ class CodeGeneratorARM64 : public CodeGenerator { // Deduplication map for 64-bit literals, used for non-patchable method address or method code. Uint64ToLiteralMap uint64_literals_; // PC-relative method patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Baker read barrier patch info. diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 6d49b32dbc..4fef027e6d 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -2354,11 +2354,11 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), @@ -4690,6 +4690,309 @@ void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMax(LocationSummary* locations, bool is_min) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + vixl32::Register op1 = RegisterFrom(op1_loc); + vixl32::Register op2 = RegisterFrom(op2_loc); + vixl32::Register out = RegisterFrom(out_loc); + + __ Cmp(op1, op2); + + { + ExactAssemblyScope aas(GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ ite(is_min ? lt : gt); + __ mov(is_min ? lt : gt, out, op1); + __ mov(is_min ? ge : le, out, op2); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. + return; + } + + vixl32::Register op1_lo = LowRegisterFrom(op1_loc); + vixl32::Register op1_hi = HighRegisterFrom(op1_loc); + vixl32::Register op2_lo = LowRegisterFrom(op2_loc); + vixl32::Register op2_hi = HighRegisterFrom(op2_loc); + vixl32::Register out_lo = LowRegisterFrom(out_loc); + vixl32::Register out_hi = HighRegisterFrom(out_loc); + UseScratchRegisterScope temps(GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + + DCHECK(op1_lo.Is(out_lo)); + DCHECK(op1_hi.Is(out_hi)); + + // Compare op1 >= op2, or op1 < op2. + __ Cmp(out_lo, op2_lo); + __ Sbcs(temp, out_hi, op2_hi); + + // Now GE/LT condition code is correct for the long comparison. + { + vixl32::ConditionType cond = is_min ? ge : lt; + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ itt(cond); + __ mov(cond, out_lo, op2_lo); + __ mov(cond, out_hi, op2_hi); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* min_max, bool is_min) { + LocationSummary* locations = min_max->GetLocations(); + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. + return; + } + + vixl32::SRegister op1 = SRegisterFrom(op1_loc); + vixl32::SRegister op2 = SRegisterFrom(op2_loc); + vixl32::SRegister out = SRegisterFrom(out_loc); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + const vixl32::Register temp1 = temps.Acquire(); + vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0)); + vixl32::Label nan, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(min_max, &done); + + DCHECK(op1.Is(out)); + + __ Vcmp(op1, op2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling. + + // op1 <> op2 + vixl32::ConditionType cond = is_min ? gt : lt; + { + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cond); + __ vmov(cond, F32, out, op2); + } + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); + + // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). + __ Vmov(temp1, op1); + __ Vmov(temp2, op2); + if (is_min) { + __ Orr(temp1, temp1, temp2); + } else { + __ And(temp1, temp1, temp2); + } + __ Vmov(out, temp1); + __ B(final_label); + + // handle NaN input. + __ Bind(&nan); + __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. + __ Vmov(out, temp1); + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* min_max, bool is_min) { + LocationSummary* locations = min_max->GetLocations(); + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. + return; + } + + vixl32::DRegister op1 = DRegisterFrom(op1_loc); + vixl32::DRegister op2 = DRegisterFrom(op2_loc); + vixl32::DRegister out = DRegisterFrom(out_loc); + vixl32::Label handle_nan_eq, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(min_max, &done); + + DCHECK(op1.Is(out)); + + __ Vcmp(op1, op2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling. + + // op1 <> op2 + vixl32::ConditionType cond = is_min ? gt : lt; + { + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cond); + __ vmov(cond, F64, out, op2); + } + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); + + // handle op1 == op2, max(+0.0,-0.0). + if (!is_min) { + __ Vand(F64, out, op1, op2); + __ B(final_label); + } + + // handle op1 == op2, min(+0.0,-0.0), NaN input. + __ Bind(&handle_nan_eq); + __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + +void LocationsBuilderARMVIXL::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) { + switch (min->GetResultType()) { + case DataType::Type::kInt32: + GenerateMinMax(min->GetLocations(), /*is_min*/ true); + break; + case DataType::Type::kInt64: + GenerateMinMaxLong(min->GetLocations(), /*is_min*/ true); + break; + case DataType::Type::kFloat32: + GenerateMinMaxFloat(min, /*is_min*/ true); + break; + case DataType::Type::kFloat64: + GenerateMinMaxDouble(min, /*is_min*/ true); + break; + default: + LOG(FATAL) << "Unexpected type for HMin " << min->GetResultType(); + } +} + +void LocationsBuilderARMVIXL::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) { + switch (max->GetResultType()) { + case DataType::Type::kInt32: + GenerateMinMax(max->GetLocations(), /*is_min*/ false); + break; + case DataType::Type::kInt64: + GenerateMinMaxLong(max->GetLocations(), /*is_min*/ false); + break; + case DataType::Type::kFloat32: + GenerateMinMaxFloat(max, /*is_min*/ false); + break; + case DataType::Type::kFloat64: + GenerateMinMaxDouble(max, /*is_min*/ false); + break; + default: + LOG(FATAL) << "Unexpected type for HMax " << max->GetResultType(); + } +} + +void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + vixl32::Register in_reg = RegisterFrom(locations->InAt(0)); + vixl32::Register out_reg = RegisterFrom(locations->Out()); + vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); + __ Asr(mask, in_reg, 31); + __ Add(out_reg, in_reg, mask); + __ Eor(out_reg, out_reg, mask); + break; + } + case DataType::Type::kInt64: { + Location in = locations->InAt(0); + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + Location output = locations->Out(); + vixl32::Register out_reg_lo = LowRegisterFrom(output); + vixl32::Register out_reg_hi = HighRegisterFrom(output); + DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; + vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); + __ Asr(mask, in_reg_hi, 31); + __ Adds(out_reg_lo, in_reg_lo, mask); + __ Adc(out_reg_hi, in_reg_hi, mask); + __ Eor(out_reg_lo, out_reg_lo, mask); + __ Eor(out_reg_hi, out_reg_hi, mask); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0)); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); @@ -7131,7 +7434,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); break; } @@ -7146,7 +7449,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ case HLoadClass::LoadKind::kBootImageClassTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); __ Ldr(out, MemOperand(out, /* offset */ 0)); // Extract the reference from the slot data, i.e. clear the hash bits. @@ -7293,7 +7596,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); return; } @@ -7307,7 +7610,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE case HLoadString::LoadKind::kBootImageInternTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); __ Ldr(out, MemOperand(out, /* offset */ 0)); return; @@ -8893,7 +9196,7 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( break; case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { DCHECK(GetCompilerOptions().IsBootImage()); - PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + PcRelativePatchInfo* labels = NewBootImageMethodPatch(invoke->GetTargetMethod()); vixl32::Register temp_reg = RegisterFrom(temp); EmitMovwMovtPlaceholder(labels, temp_reg); break; @@ -8998,42 +9301,40 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall( } } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMethodPatch( +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch( MethodReference target_method) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - &pc_relative_method_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, &boot_image_method_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch( MethodReference target_method) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - &method_bss_entry_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, &method_bss_entry_patches_); } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch( +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageTypePatch( const DexFile& dex_file, dex::TypeIndex type_index) { - return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, &boot_image_type_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntryPatch( const DexFile& dex_file, dex::TypeIndex type_index) { - return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, &type_bss_entry_patches_); } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch( +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageStringPatch( const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); + return NewPcRelativePatch(&dex_file, string_index.index_, &boot_image_string_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch( const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &string_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, string_index.index_, &string_bss_entry_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch( - const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { + const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); return &patches->back(); } @@ -9075,45 +9376,45 @@ inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, ArenaVector<linker::LinkerPatch>* linker_patches) { for (const PcRelativePatchInfo& info : infos) { - const DexFile& dex_file = info.target_dex_file; + const DexFile* dex_file = info.target_dex_file; size_t offset_or_index = info.offset_or_index; DCHECK(info.add_pc_label.IsBound()); uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation()); // Add MOVW patch. DCHECK(info.movw_label.IsBound()); uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation()); - linker_patches->push_back(Factory(movw_offset, &dex_file, add_pc_offset, offset_or_index)); + linker_patches->push_back(Factory(movw_offset, dex_file, add_pc_offset, offset_or_index)); // Add MOVT patch. DCHECK(info.movt_label.IsBound()); uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation()); - linker_patches->push_back(Factory(movt_offset, &dex_file, add_pc_offset, offset_or_index)); + linker_patches->push_back(Factory(movt_offset, dex_file, add_pc_offset, offset_or_index)); } } void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * boot_image_method_patches_.size() + /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * boot_image_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() + /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( - pc_relative_method_patches_, linker_patches); + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } else { - DCHECK(pc_relative_method_patches_.empty()); + DCHECK(boot_image_method_patches_.empty()); EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 38570bb0fe..726a2f9030 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -349,6 +349,11 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMax(LocationSummary* locations, bool is_min); + void GenerateMinMaxLong(LocationSummary* locations, bool is_min); + void GenerateMinMaxFloat(HInstruction* min_max, bool is_min); + void GenerateMinMaxDouble(HInstruction* min_max, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -552,32 +557,34 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE; - // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays - // and boot image strings/types. The only difference is the interpretation of the - // offset_or_index. The PC-relative address is loaded with three instructions, + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + // + // The PC-relative address is loaded with three instructions, // MOVW+MOVT to load the offset to base_reg and then ADD base_reg, PC. The offset // is calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we // currently emit these 3 instructions together, instruction scheduling could // split this sequence apart, so we keep separate labels for each of them. struct PcRelativePatchInfo { - PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx) + PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx) : target_dex_file(dex_file), offset_or_index(off_or_idx) { } PcRelativePatchInfo(PcRelativePatchInfo&& other) = default; - const DexFile& target_dex_file; - // Either the dex cache array element offset or the string/type index. + // Target dex file or null for .data.bmig.rel.ro patches. + const DexFile* target_dex_file; + // Either the boot image offset (to write to .data.bmig.rel.ro) or string/type/method index. uint32_t offset_or_index; vixl::aarch32::Label movw_label; vixl::aarch32::Label movt_label; vixl::aarch32::Label add_pc_label; }; - PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); + PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); - PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, dex::StringIndex string_index); @@ -774,7 +781,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { }; VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); - PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> @@ -797,15 +804,15 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; // PC-relative method patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Baker read barrier patch info. diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 97604b38a1..ae42bbcc70 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1017,11 +1017,11 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1583,7 +1583,7 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, ArenaVector<linker::LinkerPatch>* linker_patches) { for (const PcRelativePatchInfo& info : infos) { - const DexFile& dex_file = info.target_dex_file; + const DexFile* dex_file = info.target_dex_file; size_t offset_or_index = info.offset_or_index; DCHECK(info.label.IsBound()); uint32_t literal_offset = __ GetLabelLocation(&info.label); @@ -1593,33 +1593,33 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( uint32_t pc_rel_offset = info_high.pc_rel_label.IsBound() ? __ GetLabelLocation(&info_high.pc_rel_label) : __ GetPcRelBaseLabelLocation(); - linker_patches->push_back(Factory(literal_offset, &dex_file, pc_rel_offset, offset_or_index)); + linker_patches->push_back(Factory(literal_offset, dex_file, pc_rel_offset, offset_or_index)); } } void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_method_patches_.size() + + boot_image_method_patches_.size() + method_bss_entry_patches_.size() + - pc_relative_type_patches_.size() + + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - pc_relative_string_patches_.size() + + boot_image_string_patches_.size() + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( - pc_relative_method_patches_, linker_patches); + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } else { - DCHECK(pc_relative_method_patches_.empty()); + DCHECK(boot_image_method_patches_.empty()); EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1630,54 +1630,51 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link DCHECK_EQ(size, linker_patches->size()); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeMethodPatch( +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageMethodPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - info_high, - &pc_relative_method_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &boot_image_method_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewMethodBssEntryPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - info_high, - &method_bss_entry_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &method_bss_entry_patches_); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch( +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, type_index.index_, info_high, &pc_relative_type_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &boot_image_type_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewTypeBssEntryPatch( const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, type_index.index_, info_high, &type_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &type_bss_entry_patches_); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch( +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageStringPatch( const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, string_index.index_, info_high, &pc_relative_string_patches_); + return NewPcRelativePatch( + &dex_file, string_index.index_, info_high, &boot_image_string_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewStringBssEntryPatch( const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, string_index.index_, info_high, &string_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &string_bss_entry_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( - const DexFile& dex_file, + const DexFile* dex_file, uint32_t offset_or_index, const PcRelativePatchInfo* info_high, ArenaDeque<PcRelativePatchInfo>* patches) { @@ -1932,8 +1929,8 @@ void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCode enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset); - __ LoadConst32(AT, shifted_initialized_value); - __ Bltu(TMP, AT, slow_path->GetEntryLabel()); + __ Sltiu(TMP, TMP, shifted_initialized_value); + __ Bnez(TMP, slow_path->GetEntryLabel()); // Even if the initialized flag is set, we need to ensure consistent memory ordering. __ Sync(0); __ Bind(slow_path->GetExitLabel()); @@ -7638,10 +7635,6 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize); - // Set the hidden argument. - __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(), - invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex()); @@ -7666,6 +7659,9 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // T9 = temp->GetEntryPoint(); __ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value()); + // Set the hidden argument. + __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(), + invoke->GetDexMethodIndex()); // T9(); __ Jalr(T9); __ NopIfNoReordering(); @@ -7828,9 +7824,9 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall( break; case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { DCHECK(GetCompilerOptions().IsBootImage()); - PcRelativePatchInfo* info_high = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + PcRelativePatchInfo* info_high = NewBootImageMethodPatch(invoke->GetTargetMethod()); PcRelativePatchInfo* info_low = - NewPcRelativeMethodPatch(invoke->GetTargetMethod(), info_high); + NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high); Register temp_reg = temp.AsRegister<Register>(); EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label); @@ -8046,9 +8042,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); @@ -8072,9 +8068,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF case HLoadClass::LoadKind::kBootImageClassTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); @@ -8241,9 +8237,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); @@ -8266,9 +8262,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ case HLoadString::LoadKind::kBootImageInternTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); @@ -8783,6 +8779,508 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMax(LocationSummary* locations, + bool is_min, + bool isR6, + DataType::Type type) { + if (isR6) { + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions + // always change the target (output) register. If the condition is + // true the output register gets the contents of the "rs" register; + // otherwise, the output register is set to zero. One consequence + // of this is that to implement something like "rd = c==0 ? rs : rt" + // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. + // After executing this pair of instructions one of the output + // registers from the pair will necessarily contain zero. Then the + // code ORs the output registers from the SELEQZ/SELNEZ instructions + // to get the final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (type == DataType::Type::kInt64) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, b_hi, a_hi); + __ Bne(b_hi, a_hi, &compare_done); + + __ Sltu(TMP, b_lo, a_lo); + + __ Bind(&compare_done); + + if (is_min) { + __ Seleqz(AT, a_lo, TMP); + __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo + // because at this point we're + // done using a_lo/b_lo. + } else { + __ Selnez(AT, a_lo, TMP); + __ Seleqz(out_lo, b_lo, TMP); // ditto + } + __ Or(out_lo, out_lo, AT); + if (is_min) { + __ Seleqz(AT, a_hi, TMP); + __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } else { + __ Selnez(AT, a_hi, TMP); + __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } + __ Or(out_hi, out_hi, AT); + } + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, b, a); + if (is_min) { + __ Seleqz(TMP, a, AT); + __ Selnez(AT, b, AT); + } else { + __ Selnez(TMP, a, AT); + __ Seleqz(AT, b, AT); + } + __ Or(out, TMP, AT); + } + } + } else { // !isR6 + if (type == DataType::Type::kInt64) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, a_hi, b_hi); + __ Bne(a_hi, b_hi, &compare_done); + + __ Sltu(TMP, a_lo, b_lo); + + __ Bind(&compare_done); + + if (is_min) { + if (out_lo != a_lo) { + __ Movn(out_hi, a_hi, TMP); + __ Movn(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movz(out_hi, b_hi, TMP); + __ Movz(out_lo, b_lo, TMP); + } + } else { + if (out_lo != a_lo) { + __ Movz(out_hi, a_hi, TMP); + __ Movz(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movn(out_hi, b_hi, TMP); + __ Movn(out_lo, b_lo, TMP); + } + } + } + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, a, b); + if (is_min) { + if (out != a) { + __ Movn(out, a, AT); + } + if (out != b) { + __ Movz(out, b, AT); + } + } else { + if (out != a) { + __ Movz(out, a, AT); + } + if (out != b) { + __ Movn(out, b, AT); + } + } + } + } + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + bool isR6, + DataType::Type type) { + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); + + if (isR6) { + MipsLabel noNaNs; + MipsLabel done; + FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == DataType::Type::kFloat64) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinD(out, a, b); + } else { + __ MaxD(out, a, b); + } + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinS(out, a, b); + } else { + __ MaxS(out, a, b); + } + } + + __ Bind(&done); + + } else { // !isR6 + MipsLabel ordered; + MipsLabel compare; + MipsLabel select; + MipsLabel done; + + if (type == DataType::Type::kFloat64) { + __ CunD(a, b); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CunS(a, b); + } + __ Bc1f(&ordered); + + // a or b (or both) is a NaN. Return one, which is a NaN. + if (type == DataType::Type::kFloat64) { + __ CeqD(b, b); + } else { + __ CeqS(b, b); + } + __ B(&select); + + __ Bind(&ordered); + + // Neither is a NaN. + // a == b? (-0.0 compares equal with +0.0) + // If equal, handle zeroes, else compare further. + if (type == DataType::Type::kFloat64) { + __ CeqD(a, b); + } else { + __ CeqS(a, b); + } + __ Bc1f(&compare); + + // a == b either bit for bit or one is -0.0 and the other is +0.0. + if (type == DataType::Type::kFloat64) { + __ MoveFromFpuHigh(TMP, a); + __ MoveFromFpuHigh(AT, b); + } else { + __ Mfc1(TMP, a); + __ Mfc1(AT, b); + } + + if (is_min) { + // -0.0 prevails over +0.0. + __ Or(TMP, TMP, AT); + } else { + // +0.0 prevails over -0.0. + __ And(TMP, TMP, AT); + } + + if (type == DataType::Type::kFloat64) { + __ Mfc1(AT, a); + __ Mtc1(AT, out); + __ MoveToFpuHigh(TMP, out); + } else { + __ Mtc1(TMP, out); + } + __ B(&done); + + __ Bind(&compare); + + if (type == DataType::Type::kFloat64) { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeD(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeD(b, a); // b <= a + } + } else { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeS(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeS(b, a); // b <= a + } + } + + __ Bind(&select); + + if (type == DataType::Type::kFloat64) { + __ MovtD(out, a); + __ MovfD(out, b); + } else { + __ MovtS(out, a); + __ MovfS(out, b); + } + + __ Bind(&done); + } +} + +void LocationsBuilderMIPS::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorMIPS::VisitMin(HMin* min) { + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + switch (min->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(min->GetLocations(), /*is_min*/ true, isR6, min->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(min->GetLocations(), /*is_min*/ true, isR6, min->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMin " << min->GetResultType(); + } +} + +void LocationsBuilderMIPS::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorMIPS::VisitMax(HMax* max) { + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + switch (max->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(max->GetLocations(), /*is_min*/ false, isR6, max->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(max->GetLocations(), /*is_min*/ false, isR6, max->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMax " << max->GetResultType(); + } +} + +void LocationsBuilderMIPS::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS::GenerateAbsFP(LocationSummary* locations, + DataType::Type type, + bool isR2OrNewer, + bool isR6) { + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + // Note, as a "quality of implementation", rather than pure "spec compliance", we require that + // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN + // (signaling NaN may become quiet though). + // + // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case, + // both regular floating point numbers and NAN values are treated alike, only the sign bit is + // affected by this instruction. + // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any + // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be + // changed when doing abs(NaN). Because of that, we clear sign bit in a different way. + if (isR6) { + if (type == DataType::Type::kFloat64) { + __ AbsD(out, in); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ AbsS(out, in); + } + } else { + if (type == DataType::Type::kFloat64) { + if (in != out) { + __ MovD(out, in); + } + __ MoveFromFpuHigh(TMP, in); + // ins instruction is not available for R1. + if (isR2OrNewer) { + __ Ins(TMP, ZERO, 31, 1); + } else { + __ Sll(TMP, TMP, 1); + __ Srl(TMP, TMP, 1); + } + __ MoveToFpuHigh(TMP, out); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ Mfc1(TMP, in); + // ins instruction is not available for R1. + if (isR2OrNewer) { + __ Ins(TMP, ZERO, 31, 1); + } else { + __ Sll(TMP, TMP, 1); + __ Srl(TMP, TMP, 1); + } + __ Mtc1(TMP, out); + } + } +} + +void InstructionCodeGeneratorMIPS::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + bool isR2OrNewer = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + break; + } + case DataType::Type::kInt64: { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + // The comments in this section show the analogous operations which would + // be performed if we had 64-bit registers "in", and "out". + // __ Dsra32(AT, in, 31); + __ Sra(AT, in_hi, 31); + // __ Xor(out, in, AT); + __ Xor(TMP, in_lo, AT); + __ Xor(out_hi, in_hi, AT); + // __ Dsubu(out, out, AT); + __ Subu(out_lo, TMP, AT); + __ Sltu(TMP, out_lo, TMP); + __ Addu(out_hi, out_hi, TMP); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateAbsFP(locations, abs->GetResultType(), isR2OrNewer, isR6); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 32b3e4221f..ae5fe5be19 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -246,6 +246,10 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); + void GenerateMinMax(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); + void GenerateAbsFP(LocationSummary* locations, DataType::Type type, bool isR2OrNewer, bool isR6); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -576,8 +580,9 @@ class CodeGeneratorMIPS : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; - // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays - // and boot image strings. The only difference is the interpretation of the offset_or_index. + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + // // The 16-bit halves of the 32-bit PC-relative offset are patched separately, necessitating // two patches/infos. There can be more than two patches/infos if the instruction supplying // the high half is shared with e.g. a slow path, while the low half is supplied by separate @@ -592,21 +597,14 @@ class CodeGeneratorMIPS : public CodeGenerator { // ... // sw r2, low(r1) // patch // b back - struct PcRelativePatchInfo { - PcRelativePatchInfo(const DexFile& dex_file, + struct PcRelativePatchInfo : PatchInfo<MipsLabel> { + PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx, const PcRelativePatchInfo* info_high) - : target_dex_file(dex_file), - offset_or_index(off_or_idx), - label(), + : PatchInfo<MipsLabel>(dex_file, off_or_idx), pc_rel_label(), patch_info_high(info_high) { } - const DexFile& target_dex_file; - // Either the dex cache array element offset or the string/type index. - uint32_t offset_or_index; - // Label for the instruction to patch. - MipsLabel label; // Label for the instruction corresponding to PC+0. Not bound or used in low half patches. // Not bound in high half patches on R2 when using HMipsComputeBaseMethodAddress. // Bound in high half patches on R2 when using the NAL instruction instead of @@ -621,19 +619,19 @@ class CodeGeneratorMIPS : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; - PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, - dex::TypeIndex type_index, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, + dex::TypeIndex type_index, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high = nullptr); @@ -675,7 +673,7 @@ class CodeGeneratorMIPS : public CodeGenerator { using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); - PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file, uint32_t offset_or_index, const PcRelativePatchInfo* info_high, ArenaDeque<PcRelativePatchInfo>* patches); @@ -696,15 +694,15 @@ class CodeGeneratorMIPS : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; // PC-relative method patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index f1bb5c15eb..8031cca7cb 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -962,11 +962,11 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1499,39 +1499,39 @@ inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, ArenaVector<linker::LinkerPatch>* linker_patches) { for (const PcRelativePatchInfo& info : infos) { - const DexFile& dex_file = info.target_dex_file; + const DexFile* dex_file = info.target_dex_file; size_t offset_or_index = info.offset_or_index; DCHECK(info.label.IsBound()); uint32_t literal_offset = __ GetLabelLocation(&info.label); const PcRelativePatchInfo& info_high = info.patch_info_high ? *info.patch_info_high : info; uint32_t pc_rel_offset = __ GetLabelLocation(&info_high.label); - linker_patches->push_back(Factory(literal_offset, &dex_file, pc_rel_offset, offset_or_index)); + linker_patches->push_back(Factory(literal_offset, dex_file, pc_rel_offset, offset_or_index)); } } void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_method_patches_.size() + + boot_image_method_patches_.size() + method_bss_entry_patches_.size() + - pc_relative_type_patches_.size() + + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - pc_relative_string_patches_.size() + + boot_image_string_patches_.size() + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( - pc_relative_method_patches_, linker_patches); + boot_image_method_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } else { - DCHECK(pc_relative_method_patches_.empty()); + DCHECK(boot_image_method_patches_.empty()); EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - pc_relative_type_patches_, linker_patches); + boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - pc_relative_string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1542,54 +1542,51 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li DCHECK_EQ(size, linker_patches->size()); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeMethodPatch( +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageMethodPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - info_high, - &pc_relative_method_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &boot_image_method_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewMethodBssEntryPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.index, - info_high, - &method_bss_entry_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &method_bss_entry_patches_); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch( +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, type_index.index_, info_high, &pc_relative_type_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &boot_image_type_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewTypeBssEntryPatch( const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, type_index.index_, info_high, &type_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &type_bss_entry_patches_); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch( +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageStringPatch( const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, string_index.index_, info_high, &pc_relative_string_patches_); + return NewPcRelativePatch( + &dex_file, string_index.index_, info_high, &boot_image_string_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewStringBssEntryPatch( const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, string_index.index_, info_high, &string_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &string_bss_entry_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativePatch( - const DexFile& dex_file, + const DexFile* dex_file, uint32_t offset_or_index, const PcRelativePatchInfo* info_high, ArenaDeque<PcRelativePatchInfo>* patches) { @@ -1776,8 +1773,8 @@ void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCo enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset); - __ LoadConst32(AT, shifted_initialized_value); - __ Bltuc(TMP, AT, slow_path->GetEntryLabel()); + __ Sltiu(TMP, TMP, shifted_initialized_value); + __ Bnezc(TMP, slow_path->GetEntryLabel()); // Even if the initialized flag is set, we need to ensure consistent memory ordering. __ Sync(0); __ Bind(slow_path->GetExitLabel()); @@ -5917,9 +5914,9 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { DCHECK(GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + NewBootImageMethodPatch(invoke->GetTargetMethod()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - NewPcRelativeMethodPatch(invoke->GetTargetMethod(), info_high); + NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high); EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); break; @@ -6099,9 +6096,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Daddiu(out, AT, /* placeholder */ 0x5678); break; @@ -6119,9 +6116,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S case HLoadClass::LoadKind::kBootImageClassTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Lwu(out, AT, /* placeholder */ 0x5678); // Extract the reference from the slot data, i.e. clear the hash bits. @@ -6235,9 +6232,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Daddiu(out, AT, /* placeholder */ 0x5678); return; @@ -6254,9 +6251,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA case HLoadString::LoadKind::kBootImageInternTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Lwu(out, AT, /* placeholder */ 0x5678); return; @@ -6668,6 +6665,242 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMax(LocationSummary* locations, bool is_min) { + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (lhs == rhs) { + if (out != lhs) { + __ Move(out, lhs); + } + } else { + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always + // change the target (output) register. If the condition is true the + // output register gets the contents of the "rs" register; otherwise, + // the output register is set to zero. One consequence of this is + // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 + // needs to use a pair of SELEQZ/SELNEZ instructions. After + // executing this pair of instructions one of the output registers + // from the pair will necessarily contain zero. Then the code ORs the + // output registers from the SELEQZ/SELNEZ instructions to get the + // final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (out == lhs) { + __ Slt(AT, rhs, lhs); + if (is_min) { + __ Seleqz(out, lhs, AT); + __ Selnez(AT, rhs, AT); + } else { + __ Selnez(out, lhs, AT); + __ Seleqz(AT, rhs, AT); + } + } else { + __ Slt(AT, lhs, rhs); + if (is_min) { + __ Seleqz(out, rhs, AT); + __ Selnez(AT, lhs, AT); + } else { + __ Selnez(out, rhs, AT); + __ Seleqz(AT, lhs, AT); + } + } + __ Or(out, out, AT); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + Mips64Label noNaNs; + Mips64Label done; + FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == DataType::Type::kFloat64) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinD(out, a, b); + } else { + __ MaxD(out, a, b); + } + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinS(out, a, b); + } else { + __ MaxS(out, a, b); + } + } + + __ Bind(&done); +} + +void LocationsBuilderMIPS64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorMIPS64::VisitMin(HMin* min) { + switch (min->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(min->GetLocations(), /*is_min*/ true); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMin " << min->GetResultType(); + } +} + +void LocationsBuilderMIPS64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorMIPS64::VisitMax(HMax* max) { + switch (max->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(max->GetLocations(), /*is_min*/ false); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMax " << max->GetResultType(); + } +} + +void LocationsBuilderMIPS64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS64::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + break; + } + case DataType::Type::kInt64: { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + __ Dsra32(AT, in, 31); + __ Xor(out, in, AT); + __ Dsubu(out, out, AT); + break; + } + case DataType::Type::kFloat32: { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + __ AbsS(out, in); + break; + } + case DataType::Type::kFloat64: { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + __ AbsD(out, in); + break; + } + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index d479410f07..5d925d5d5a 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -242,6 +242,9 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMax(LocationSummary* locations, bool is_min); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -555,9 +558,9 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; - // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays, - // boot image strings and method calls. The only difference is the interpretation of - // the offset_or_index. + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + // // The 16-bit halves of the 32-bit PC-relative offset are patched separately, necessitating // two patches/infos. There can be more than two patches/infos if the instruction supplying // the high half is shared with e.g. a slow path, while the low half is supplied by separate @@ -571,20 +574,13 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // ... // sw r2, low(r1) // patch // bc back - struct PcRelativePatchInfo { - PcRelativePatchInfo(const DexFile& dex_file, + struct PcRelativePatchInfo : PatchInfo<Mips64Label> { + PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx, const PcRelativePatchInfo* info_high) - : target_dex_file(dex_file), - offset_or_index(off_or_idx), - label(), + : PatchInfo<Mips64Label>(dex_file, off_or_idx), patch_info_high(info_high) { } - const DexFile& target_dex_file; - // Either the dex cache array element offset or the string/type/method index. - uint32_t offset_or_index; - // Label for the instruction to patch. - Mips64Label label; // Pointer to the info for the high half patch or nullptr if this is the high half patch info. const PcRelativePatchInfo* patch_info_high; @@ -593,19 +589,19 @@ class CodeGeneratorMIPS64 : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; - PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, - dex::TypeIndex type_index, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, + dex::TypeIndex type_index, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high = nullptr); @@ -639,7 +635,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateUint64Literal(uint64_t value); - PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file, uint32_t offset_or_index, const PcRelativePatchInfo* info_high, ArenaDeque<PcRelativePatchInfo>* patches); @@ -663,15 +659,15 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // address. Uint64ToLiteralMap uint64_literals_; // PC-relative method patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 5fede80bc7..536909aa1f 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -51,6 +51,9 @@ static constexpr int kC2ConditionMask = 0x400; static constexpr int kFakeReturnRegister = Register(8); +static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); +static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value() @@ -1028,7 +1031,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -3802,6 +3805,307 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + // Register to use to perform a long subtract to set cc. + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::GenerateMinMax(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + if (type == DataType::Type::kInt64) { + // Need to perform a subtract to get the sign right. + // op1 is already in the same location as the output. + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + + Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); + Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); + + // The comparison is performed by subtracting the second operand from + // the first operand and then setting the status flags in the same + // manner as the SUB instruction." + __ cmpl(output_lo, op2_lo); + + // Now use a temp and the borrow to finish the subtraction of op2_hi. + Register temp = locations->GetTemp(0).AsRegister<Register>(); + __ movl(temp, output_hi); + __ sbbl(temp, op2_hi); + + // Now the condition code is correct. + Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; + __ cmovl(cond, output_lo, op2_lo); + __ cmovl(cond, output_hi, op2_hi); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register out = locations->Out().AsRegister<Register>(); + Register op2 = op2_loc.AsRegister<Register>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + __ cmpl(out, op2); + Condition cond = is_min ? Condition::kGreater : Condition::kLess; + __ cmovl(cond, out, op2); + } +} + +void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + NearLabel nan, done, op2_label; + if (type == DataType::Type::kFloat64) { + __ ucomisd(out, op2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (type == DataType::Type::kFloat64) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (type == DataType::Type::kFloat64) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + if (type == DataType::Type::kFloat64) { + // TODO: Use a constant from the constant table (requires extra input). + __ LoadLongConstant(out, kDoubleNaN); + } else { + Register constant = locations->GetTemp(0).AsRegister<Register>(); + __ movl(constant, Immediate(kFloatNaN)); + __ movd(out, constant); + } + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (type == DataType::Type::kFloat64) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +void LocationsBuilderX86::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorX86::VisitMin(HMin* min) { + switch (min->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMin " << min->GetResultType(); + } +} + +void LocationsBuilderX86::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorX86::VisitMax(HMax* max) { + switch (max->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMax " << max->GetResultType(); + } +} + +void LocationsBuilderX86::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RegisterLocation(EAX)); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RegisterLocation(EDX)); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + Register out = locations->Out().AsRegister<Register>(); + DCHECK_EQ(out, EAX); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + DCHECK_EQ(temp, EDX); + // Sign extend EAX into EDX. + __ cdq(); + // XOR EAX with sign. + __ xorl(EAX, EDX); + // Subtract out sign to correct. + __ subl(EAX, EDX); + // The result is in EAX. + break; + } + case DataType::Type::kInt64: { + Location input = locations->InAt(0); + Register input_lo = input.AsRegisterPairLow<Register>(); + Register input_hi = input.AsRegisterPairHigh<Register>(); + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + // Compute the sign into the temporary. + __ movl(temp, input_hi); + __ sarl(temp, Immediate(31)); + // Store the sign into the output. + __ movl(output_lo, temp); + __ movl(output_hi, temp); + // XOR the input to the output. + __ xorl(output_lo, input_lo); + __ xorl(output_hi, input_hi); + // Subtract the sign. + __ subl(output_lo, temp); + __ sbbl(output_hi, temp); + break; + } + case DataType::Type::kFloat32: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + Register constant = locations->GetTemp(1).AsRegister<Register>(); + __ movl(constant, Immediate(INT32_C(0x7FFFFFFF))); + __ movd(temp, constant); + __ andps(out, temp); + break; + } + case DataType::Type::kFloat64: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // TODO: Use a constant from the constant table (requires extra input). + __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF)); + __ andpd(out, temp); + break; + } + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); switch (instruction->GetType()) { @@ -4528,7 +4832,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall( Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()); __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset)); - RecordBootMethodPatch(invoke); + RecordBootImageMethodPatch(invoke); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: @@ -4538,10 +4842,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall( Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()); __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); - // Bind a new fixup label at the end of the "movl" insn. - __ Bind(NewMethodBssEntryPatch( - invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(), - MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()))); + RecordMethodBssEntryPatch(invoke); break; } case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { @@ -4598,56 +4899,55 @@ void CodeGeneratorX86::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } -void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { +void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); - HX86ComputeBaseMethodAddress* address = + HX86ComputeBaseMethodAddress* method_address = invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); - boot_image_method_patches_.emplace_back(address, - *invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().index); + boot_image_method_patches_.emplace_back( + method_address, invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index); __ Bind(&boot_image_method_patches_.back().label); } -Label* CodeGeneratorX86::NewMethodBssEntryPatch( - HX86ComputeBaseMethodAddress* method_address, - MethodReference target_method) { +void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); // Add the patch entry and bind its label at the end of the instruction. - method_bss_entry_patches_.emplace_back(method_address, - *target_method.dex_file, - target_method.index); - return &method_bss_entry_patches_.back().label; + method_bss_entry_patches_.emplace_back( + method_address, &GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); + __ Bind(&method_bss_entry_patches_.back().label); } -void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) { - HX86ComputeBaseMethodAddress* address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); - boot_image_type_patches_.emplace_back(address, - load_class->GetDexFile(), - load_class->GetTypeIndex().index_); +void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) { + HX86ComputeBaseMethodAddress* method_address = + load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); + boot_image_type_patches_.emplace_back( + method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_); __ Bind(&boot_image_type_patches_.back().label); } Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) { - HX86ComputeBaseMethodAddress* address = + HX86ComputeBaseMethodAddress* method_address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); type_bss_entry_patches_.emplace_back( - address, load_class->GetDexFile(), load_class->GetTypeIndex().index_); + method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_); return &type_bss_entry_patches_.back().label; } -void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { - HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); - string_patches_.emplace_back(address, - load_string->GetDexFile(), - load_string->GetStringIndex().index_); - __ Bind(&string_patches_.back().label); +void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) { + HX86ComputeBaseMethodAddress* method_address = + load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); + boot_image_string_patches_.emplace_back( + method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_); + __ Bind(&boot_image_string_patches_.back().label); } Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); - HX86ComputeBaseMethodAddress* address = + HX86ComputeBaseMethodAddress* method_address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); string_bss_entry_patches_.emplace_back( - address, load_string->GetDexFile(), load_string->GetStringIndex().index_); + method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_); return &string_bss_entry_patches_.back().label; } @@ -4661,8 +4961,10 @@ inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( ArenaVector<linker::LinkerPatch>* linker_patches) { for (const X86PcRelativePatchInfo& info : infos) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(Factory( - literal_offset, &info.dex_file, GetMethodAddressOffset(info.method_address), info.index)); + linker_patches->push_back(Factory(literal_offset, + info.target_dex_file, + GetMethodAddressOffset(info.method_address), + info.offset_or_index)); } } @@ -4673,7 +4975,7 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - string_patches_.size() + + boot_image_string_patches_.size() + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -4682,13 +4984,13 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } else { DCHECK(boot_image_method_patches_.empty()); EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -6118,7 +6420,7 @@ Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file, Handle<mirror::Class> handle) { ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); // Add a patch entry and return the label. - jit_class_patches_.emplace_back(dex_file, type_index.index_); + jit_class_patches_.emplace_back(&dex_file, type_index.index_); PatchInfo<Label>* info = &jit_class_patches_.back(); return &info->label; } @@ -6160,7 +6462,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootTypePatch(cls); + codegen_->RecordBootImageTypePatch(cls); break; } case HLoadClass::LoadKind::kBootImageAddress: { @@ -6175,7 +6477,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootTypePatch(cls); + codegen_->RecordBootImageTypePatch(cls); // Extract the reference from the slot data, i.e. clear the hash bits. int32_t masked_hash = ClassTable::TableSlot::MaskHash( ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); @@ -6307,7 +6609,7 @@ Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file, Handle<mirror::String> handle) { ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); // Add a patch entry and return the label. - jit_string_patches_.emplace_back(dex_file, string_index.index_); + jit_string_patches_.emplace_back(&dex_file, string_index.index_); PatchInfo<Label>* info = &jit_string_patches_.back(); return &info->label; } @@ -6324,7 +6626,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S DCHECK(codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootStringPatch(load); + codegen_->RecordBootImageStringPatch(load); return; } case HLoadString::LoadKind::kBootImageAddress: { @@ -6338,7 +6640,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootStringPatch(load); + codegen_->RecordBootImageStringPatch(load); return; } case HLoadString::LoadKind::kBssEntry: { @@ -7830,13 +8132,13 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code, void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const PatchInfo<Label>& info : jit_string_patches_) { - StringReference string_reference(&info.dex_file, dex::StringIndex(info.index)); + StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index)); uint64_t index_in_table = GetJitStringRootIndex(string_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } for (const PatchInfo<Label>& info : jit_class_patches_) { - TypeReference type_reference(&info.dex_file, dex::TypeIndex(info.index)); + TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index)); uint64_t index_in_table = GetJitClassRootIndex(type_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 0082853184..82496d12e5 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -225,6 +225,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void GenerateShlLong(const Location& loc, int shift); void GenerateShrLong(const Location& loc, int shift); void GenerateUShrLong(const Location& loc, int shift); + void GenerateMinMax(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, @@ -414,12 +416,11 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateVirtualCall( HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; - void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); - Label* NewMethodBssEntryPatch(HX86ComputeBaseMethodAddress* method_address, - MethodReference target_method); - void RecordBootTypePatch(HLoadClass* load_class); + void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); + void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke); + void RecordBootImageTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); - void RecordBootStringPatch(HLoadString* load_string); + void RecordBootImageStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex string_index, @@ -610,7 +611,7 @@ class CodeGeneratorX86 : public CodeGenerator { private: struct X86PcRelativePatchInfo : PatchInfo<Label> { X86PcRelativePatchInfo(HX86ComputeBaseMethodAddress* address, - const DexFile& target_dex_file, + const DexFile* target_dex_file, uint32_t target_index) : PatchInfo(target_dex_file, target_index), method_address(address) {} @@ -641,7 +642,7 @@ class CodeGeneratorX86 : public CodeGenerator { // Type patch locations for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; // String patch locations; type depends on configuration (intern table or boot image PIC). - ArenaDeque<X86PcRelativePatchInfo> string_patches_; + ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_; // String patch locations for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ae35ab5983..bb1fbc5290 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -993,7 +993,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall( DCHECK(GetCompilerOptions().IsBootImage()); __ leal(temp.AsRegister<CpuRegister>(), Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); - RecordBootMethodPatch(invoke); + RecordBootImageMethodPatch(invoke); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); @@ -1001,9 +1001,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { __ movq(temp.AsRegister<CpuRegister>(), Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); - // Bind a new fixup label at the end of the "movl" insn. - __ Bind(NewMethodBssEntryPatch( - MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()))); + RecordMethodBssEntryPatch(invoke); break; } case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { @@ -1061,38 +1059,39 @@ void CodeGeneratorX86_64::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } -void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { - boot_image_method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().index); +void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { + boot_image_method_patches_.emplace_back( + invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index); __ Bind(&boot_image_method_patches_.back().label); } -Label* CodeGeneratorX86_64::NewMethodBssEntryPatch(MethodReference target_method) { - // Add a patch entry and return the label. - method_bss_entry_patches_.emplace_back(*target_method.dex_file, target_method.index); - return &method_bss_entry_patches_.back().label; +void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) { + method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); + __ Bind(&method_bss_entry_patches_.back().label); } -void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) { - boot_image_type_patches_.emplace_back(load_class->GetDexFile(), - load_class->GetTypeIndex().index_); +void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) { + boot_image_type_patches_.emplace_back( + &load_class->GetDexFile(), load_class->GetTypeIndex().index_); __ Bind(&boot_image_type_patches_.back().label); } Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) { - type_bss_entry_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_); + type_bss_entry_patches_.emplace_back( + &load_class->GetDexFile(), load_class->GetTypeIndex().index_); return &type_bss_entry_patches_.back().label; } -void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { - string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); - __ Bind(&string_patches_.back().label); +void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) { + boot_image_string_patches_.emplace_back( + &load_string->GetDexFile(), load_string->GetStringIndex().index_); + __ Bind(&boot_image_string_patches_.back().label); } Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); string_bss_entry_patches_.emplace_back( - load_string->GetDexFile(), load_string->GetStringIndex().index_); + &load_string->GetDexFile(), load_string->GetStringIndex().index_); return &string_bss_entry_patches_.back().label; } @@ -1107,7 +1106,7 @@ inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( for (const PatchInfo<Label>& info : infos) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back( - Factory(literal_offset, &info.dex_file, info.label.Position(), info.index)); + Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index)); } } @@ -1118,7 +1117,7 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - string_patches_.size() + + boot_image_string_patches_.size() + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -1127,13 +1126,13 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( - string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } else { DCHECK(boot_image_method_patches_.empty()); EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - string_patches_, linker_patches); + boot_image_string_patches_, linker_patches); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1231,7 +1230,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -3822,6 +3821,247 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + // The following is sub-optimal, but all we can do for now. It would be fine to also accept + // the second input to be the output (we can simply swap inputs). + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::GenerateMinMax(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + if (type == DataType::Type::kInt64) { + __ cmpq(out, op2); + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + __ cmpl(out, op2); + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false); + } +} + +void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + NearLabel nan, done, op2_label; + if (type == DataType::Type::kFloat64) { + __ ucomisd(out, op2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (type == DataType::Type::kFloat64) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (type == DataType::Type::kFloat64) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + if (type == DataType::Type::kFloat64) { + __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000))); + } else { + __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000))); + } + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (type == DataType::Type::kFloat64) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +void LocationsBuilderX86_64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) { + switch (min->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMin " << min->GetResultType(); + } +} + +void LocationsBuilderX86_64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) { + switch (max->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMax " << max->GetResultType(); + } +} + +void LocationsBuilderX86_64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); + // Create mask. + __ movl(mask, out); + __ sarl(mask, Immediate(31)); + // Add mask. + __ addl(out, mask); + __ xorl(out, mask); + break; + } + case DataType::Type::kInt64: { + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); + // Create mask. + __ movq(mask, out); + __ sarq(mask, Immediate(63)); + // Add mask. + __ addq(out, mask); + __ xorq(out, mask); + break; + } + case DataType::Type::kFloat32: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF))); + __ andps(out, mask); + break; + } + case DataType::Type::kFloat64: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); + __ andpd(out, mask); + break; + } + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::Any()); @@ -5529,7 +5769,7 @@ Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file, Handle<mirror::Class> handle) { ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); // Add a patch entry and return the label. - jit_class_patches_.emplace_back(dex_file, type_index.index_); + jit_class_patches_.emplace_back(&dex_file, type_index.index_); PatchInfo<Label>* info = &jit_class_patches_.back(); return &info->label; } @@ -5570,7 +5810,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootTypePatch(cls); + codegen_->RecordBootImageTypePatch(cls); break; case HLoadClass::LoadKind::kBootImageAddress: { DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); @@ -5583,7 +5823,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S case HLoadClass::LoadKind::kBootImageClassTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootTypePatch(cls); + codegen_->RecordBootImageTypePatch(cls); // Extract the reference from the slot data, i.e. clear the hash bits. int32_t masked_hash = ClassTable::TableSlot::MaskHash( ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); @@ -5694,7 +5934,7 @@ Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file, Handle<mirror::String> handle) { ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); // Add a patch entry and return the label. - jit_string_patches_.emplace_back(dex_file, string_index.index_); + jit_string_patches_.emplace_back(&dex_file, string_index.index_); PatchInfo<Label>* info = &jit_string_patches_.back(); return &info->label; } @@ -5710,7 +5950,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootStringPatch(load); + codegen_->RecordBootImageStringPatch(load); return; } case HLoadString::LoadKind::kBootImageAddress: { @@ -5723,7 +5963,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA case HLoadString::LoadKind::kBootImageInternTable: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootStringPatch(load); + codegen_->RecordBootImageStringPatch(load); return; } case HLoadString::LoadKind::kBssEntry: { @@ -7160,13 +7400,13 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const PatchInfo<Label>& info : jit_string_patches_) { - StringReference string_reference(&info.dex_file, dex::StringIndex(info.index)); + StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index)); uint64_t index_in_table = GetJitStringRootIndex(string_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } for (const PatchInfo<Label>& info : jit_class_patches_) { - TypeReference type_reference(&info.dex_file, dex::TypeIndex(info.index)); + TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index)); uint64_t index_in_table = GetJitClassRootIndex(type_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index e86123ef01..933afdab26 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -222,6 +222,9 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMax(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -410,11 +413,11 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateVirtualCall( HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; - void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); - Label* NewMethodBssEntryPatch(MethodReference target_method); - void RecordBootTypePatch(HLoadClass* load_class); + void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); + void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke); + void RecordBootImageTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); - void RecordBootStringPatch(HLoadString* load_string); + void RecordBootImageStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex string_index, @@ -613,7 +616,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Type patch locations for kBssEntry. ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; // String patch locations; type depends on configuration (intern table or boot image PIC). - ArenaDeque<PatchInfo<Label>> string_patches_; + ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; // String patch locations for kBssEntry. ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index ba4040acad..a0fd5ffcb1 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -18,6 +18,7 @@ #include <memory> #include "base/macros.h" +#include "base/utils.h" #include "builder.h" #include "codegen_test_utils.h" #include "dex/dex_file.h" @@ -26,7 +27,6 @@ #include "nodes.h" #include "optimizing_unit_test.h" #include "register_allocator_linear_scan.h" -#include "utils.h" #include "utils/arm/assembler_arm_vixl.h" #include "utils/arm/managed_register_arm.h" #include "utils/mips/managed_register_mips.h" diff --git a/compiler/optimizing/data_type-inl.h b/compiler/optimizing/data_type-inl.h index e2cf7a80fe..94807e8fc9 100644 --- a/compiler/optimizing/data_type-inl.h +++ b/compiler/optimizing/data_type-inl.h @@ -18,7 +18,7 @@ #define ART_COMPILER_OPTIMIZING_DATA_TYPE_INL_H_ #include "data_type.h" -#include "primitive.h" +#include "dex/primitive.h" namespace art { diff --git a/compiler/optimizing/data_type_test.cc b/compiler/optimizing/data_type_test.cc index ca137b7c7c..8fea22bce8 100644 --- a/compiler/optimizing/data_type_test.cc +++ b/compiler/optimizing/data_type_test.cc @@ -20,7 +20,7 @@ #include "base/array_ref.h" #include "base/macros.h" -#include "primitive.h" +#include "dex/primitive.h" namespace art { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index bbf8c26d59..5ff31cead5 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -27,6 +27,7 @@ #include "code_generator.h" #include "data_type-inl.h" #include "dead_code_elimination.h" +#include "dex/descriptors_names.h" #include "disassembler.h" #include "inliner.h" #include "licm.h" diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 71c394ec1f..f05159b735 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -17,11 +17,11 @@ #include "gvn.h" #include "base/arena_bit_vector.h" +#include "base/bit_vector-inl.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" -#include "base/bit_vector-inl.h" +#include "base/utils.h" #include "side_effects_analysis.h" -#include "utils.h" namespace art { diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 99dec11240..0a310ca940 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -78,22 +78,15 @@ static bool IsGEZero(HInstruction* instruction) { DCHECK(instruction != nullptr); if (instruction->IsArrayLength()) { return true; - } else if (instruction->IsInvokeStaticOrDirect()) { - switch (instruction->AsInvoke()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - // Instruction MIN(>=0, >=0) is >= 0. - return IsGEZero(instruction->InputAt(0)) && - IsGEZero(instruction->InputAt(1)); - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - // Instruction ABS(>=0) is >= 0. - // NOTE: ABS(minint) = minint prevents assuming - // >= 0 without looking at the argument. - return IsGEZero(instruction->InputAt(0)); - default: - break; - } + } else if (instruction->IsMin()) { + // Instruction MIN(>=0, >=0) is >= 0. + return IsGEZero(instruction->InputAt(0)) && + IsGEZero(instruction->InputAt(1)); + } else if (instruction->IsAbs()) { + // Instruction ABS(>=0) is >= 0. + // NOTE: ABS(minint) = minint prevents assuming + // >= 0 without looking at the argument. + return IsGEZero(instruction->InputAt(0)); } int64_t value = -1; return IsInt64AndGet(instruction, &value) && value >= 0; @@ -102,21 +95,14 @@ static bool IsGEZero(HInstruction* instruction) { /** Hunts "under the hood" for a suitable instruction at the hint. */ static bool IsMaxAtHint( HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) { - if (instruction->IsInvokeStaticOrDirect()) { - switch (instruction->AsInvoke()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - // For MIN(x, y), return most suitable x or y as maximum. - return IsMaxAtHint(instruction->InputAt(0), hint, suitable) || - IsMaxAtHint(instruction->InputAt(1), hint, suitable); - default: - break; - } + if (instruction->IsMin()) { + // For MIN(x, y), return most suitable x or y as maximum. + return IsMaxAtHint(instruction->InputAt(0), hint, suitable) || + IsMaxAtHint(instruction->InputAt(1), hint, suitable); } else { *suitable = instruction; return HuntForDeclaration(instruction) == hint; } - return false; } /** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */ @@ -365,11 +351,11 @@ void InductionVarRange::Replace(HInstruction* instruction, } } -bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const { +bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const { HInductionVarAnalysis::InductionInfo *trip = induction_analysis_->LookupInfo(loop, GetLoopControl(loop)); if (trip != nullptr && !IsUnsafeTripCount(trip)) { - IsConstant(trip->op_a, kExact, tc); + IsConstant(trip->op_a, kExact, trip_count); return true; } return false; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index a42a85dc1d..34837700a2 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -120,6 +120,9 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifyReturnThis(HInvoke* invoke); void SimplifyAllocationIntrinsic(HInvoke* invoke); void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind); + void SimplifyMin(HInvoke* invoke, DataType::Type type); + void SimplifyMax(HInvoke* invoke, DataType::Type type); + void SimplifyAbs(HInvoke* invoke, DataType::Type type); CodeGenerator* codegen_; CompilerDriver* compiler_driver_; @@ -850,34 +853,10 @@ static HInstruction* NewIntegralAbs(ArenaAllocator* allocator, HInstruction* x, HInstruction* cursor) { DataType::Type type = x->GetType(); - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); - // Construct a fake intrinsic with as much context as is needed to allocate one. - // The intrinsic will always be lowered into code later anyway. - // TODO: b/65164101 : moving towards a real HAbs node makes more sense. - HInvokeStaticOrDirect::DispatchInfo dispatch_info = { - HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u - }; - HInvokeStaticOrDirect* invoke = new (allocator) HInvokeStaticOrDirect( - allocator, - 1, - type, - x->GetDexPc(), - /*method_idx*/ -1, - /*resolved_method*/ nullptr, - dispatch_info, - kStatic, - MethodReference(nullptr, dex::kDexNoIndex), - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); - invoke->SetArgumentAt(0, x); - invoke->SetIntrinsic(type == DataType::Type::kInt32 ? Intrinsics::kMathAbsInt - : Intrinsics::kMathAbsLong, - kNoEnvironmentOrCache, - kNoSideEffects, - kNoThrow); - cursor->GetBlock()->InsertInstructionBefore(invoke, cursor); - return invoke; + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); + HAbs* abs = new (allocator) HAbs(type, x, x->GetDexPc()); + cursor->GetBlock()->InsertInstructionBefore(abs, cursor); + return abs; } // Returns true if operands a and b consists of widening type conversions @@ -2430,6 +2409,27 @@ void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier); } +void InstructionSimplifierVisitor::SimplifyMin(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HMin* min = new (GetGraph()->GetAllocator()) + HMin(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, min); +} + +void InstructionSimplifierVisitor::SimplifyMax(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HMax* max = new (GetGraph()->GetAllocator()) + HMax(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, max); +} + +void InstructionSimplifierVisitor::SimplifyAbs(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HAbs* abs = new (GetGraph()->GetAllocator()) + HAbs(type, invoke->InputAt(0), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, abs); +} + void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { switch (instruction->GetIntrinsic()) { case Intrinsics::kStringEquals: @@ -2513,6 +2513,42 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { case Intrinsics::kVarHandleStoreStoreFence: SimplifyMemBarrier(instruction, MemBarrierKind::kStoreStore); break; + case Intrinsics::kMathMinIntInt: + SimplifyMin(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathMinLongLong: + SimplifyMin(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathMinFloatFloat: + SimplifyMin(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathMinDoubleDouble: + SimplifyMin(instruction, DataType::Type::kFloat64); + break; + case Intrinsics::kMathMaxIntInt: + SimplifyMax(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathMaxLongLong: + SimplifyMax(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathMaxFloatFloat: + SimplifyMax(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathMaxDoubleDouble: + SimplifyMax(instruction, DataType::Type::kFloat64); + break; + case Intrinsics::kMathAbsInt: + SimplifyAbs(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathAbsLong: + SimplifyAbs(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathAbsFloat: + SimplifyAbs(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathAbsDouble: + SimplifyAbs(instruction, DataType::Type::kFloat64); + break; default: break; } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index acb830e524..f8dc316e45 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -18,6 +18,7 @@ #include "art_field-inl.h" #include "art_method-inl.h" +#include "base/utils.h" #include "class_linker.h" #include "dex/invoke_type.h" #include "driver/compiler_driver.h" @@ -26,7 +27,6 @@ #include "nodes.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" -#include "utils.h" namespace art { diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 62991435c7..1035cbc2c4 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -266,6 +266,18 @@ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) { \ << " should have been converted to HIR"; \ } #define UNREACHABLE_INTRINSICS(Arch) \ +UNREACHABLE_INTRINSIC(Arch, MathMinIntInt) \ +UNREACHABLE_INTRINSIC(Arch, MathMinLongLong) \ +UNREACHABLE_INTRINSIC(Arch, MathMinFloatFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathMinDoubleDouble) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxIntInt) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxLongLong) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxFloatFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxDoubleDouble) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsInt) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsLong) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsDouble) \ UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits) \ UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits) \ UNREACHABLE_INTRINSIC(Arch, FloatIsNaN) \ diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 2f8e33f941..81c0b50932 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -344,14 +344,6 @@ void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler()); } -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - static void GenNumberOfLeadingZeros(LocationSummary* locations, DataType::Type type, MacroAssembler* masm) { @@ -536,168 +528,6 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } -static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { - Location in = locations->InAt(0); - Location out = locations->Out(); - - FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in); - FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out); - - __ Fabs(out_reg, in_reg); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); -} - -static void GenAbsInteger(LocationSummary* locations, - bool is64bit, - MacroAssembler* masm) { - Location in = locations->InAt(0); - Location output = locations->Out(); - - Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in); - Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output); - - __ Cmp(in_reg, Operand(0)); - __ Cneg(out_reg, in_reg, lt); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - bool is_double, - MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1); - FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2); - FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out); - if (is_min) { - __ Fmin(out_reg, op1_reg, op2_reg); - } else { - __ Fmax(out_reg, op1_reg, op2_reg); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler()); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - bool is_long, - MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1); - Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2); - Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out); - - __ Cmp(op1_reg, op2_reg); - __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler()); -} - void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 830d0403e4..e61a0b0809 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -432,341 +432,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invo GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_); } -static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { - __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0)); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke, GetAssembler()); -} - -static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsInteger(LocationSummary* locations, - bool is64bit, - ArmVIXLAssembler* assembler) { - Location in = locations->InAt(0); - Location output = locations->Out(); - - vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); - - if (is64bit) { - vixl32::Register in_reg_lo = LowRegisterFrom(in); - vixl32::Register in_reg_hi = HighRegisterFrom(in); - vixl32::Register out_reg_lo = LowRegisterFrom(output); - vixl32::Register out_reg_hi = HighRegisterFrom(output); - - DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; - - __ Asr(mask, in_reg_hi, 31); - __ Adds(out_reg_lo, in_reg_lo, mask); - __ Adc(out_reg_hi, in_reg_hi, mask); - __ Eor(out_reg_lo, mask, out_reg_lo); - __ Eor(out_reg_hi, mask, out_reg_hi); - } else { - vixl32::Register in_reg = RegisterFrom(in); - vixl32::Register out_reg = RegisterFrom(output); - - __ Asr(mask, in_reg, 31); - __ Add(out_reg, in_reg, mask); - __ Eor(out_reg, mask, out_reg); - } -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { - ArmVIXLAssembler* assembler = codegen->GetAssembler(); - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. - return; - } - - vixl32::SRegister op1 = SRegisterFrom(op1_loc); - vixl32::SRegister op2 = SRegisterFrom(op2_loc); - vixl32::SRegister out = OutputSRegister(invoke); - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp1 = temps.Acquire(); - vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0)); - vixl32::Label nan, done; - vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); - - DCHECK(op1.Is(out)); - - __ Vcmp(op1, op2); - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling. - - // op1 <> op2 - vixl32::ConditionType cond = is_min ? gt : lt; - { - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cond); - __ vmov(cond, F32, out, op2); - } - // for <>(not equal), we've done min/max calculation. - __ B(ne, final_label, /* far_target */ false); - - // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). - __ Vmov(temp1, op1); - __ Vmov(temp2, op2); - if (is_min) { - __ Orr(temp1, temp1, temp2); - } else { - __ And(temp1, temp1, temp2); - } - __ Vmov(out, temp1); - __ B(final_label); - - // handle NaN input. - __ Bind(&nan); - __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. - __ Vmov(out, temp1); - - if (done.IsReferenced()) { - __ Bind(&done); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); - invoke->GetLocations()->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ true, codegen_); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); - invoke->GetLocations()->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ false, codegen_); -} - -static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { - ArmVIXLAssembler* assembler = codegen->GetAssembler(); - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. - return; - } - - vixl32::DRegister op1 = DRegisterFrom(op1_loc); - vixl32::DRegister op2 = DRegisterFrom(op2_loc); - vixl32::DRegister out = OutputDRegister(invoke); - vixl32::Label handle_nan_eq, done; - vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); - - DCHECK(op1.Is(out)); - - __ Vcmp(op1, op2); - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling. - - // op1 <> op2 - vixl32::ConditionType cond = is_min ? gt : lt; - { - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cond); - __ vmov(cond, F64, out, op2); - } - // for <>(not equal), we've done min/max calculation. - __ B(ne, final_label, /* far_target */ false); - - // handle op1 == op2, max(+0.0,-0.0). - if (!is_min) { - __ Vand(F64, out, op1, op2); - __ B(final_label); - } - - // handle op1 == op2, min(+0.0,-0.0), NaN input. - __ Bind(&handle_nan_eq); - __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. - - if (done.IsReferenced()) { - __ Bind(&done); - } -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ true , codegen_); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ false, codegen_); -} - -static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. - return; - } - - vixl32::Register op1_lo = LowRegisterFrom(op1_loc); - vixl32::Register op1_hi = HighRegisterFrom(op1_loc); - vixl32::Register op2_lo = LowRegisterFrom(op2_loc); - vixl32::Register op2_hi = HighRegisterFrom(op2_loc); - vixl32::Register out_lo = LowRegisterFrom(out_loc); - vixl32::Register out_hi = HighRegisterFrom(out_loc); - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp = temps.Acquire(); - - DCHECK(op1_lo.Is(out_lo)); - DCHECK(op1_hi.Is(out_hi)); - - // Compare op1 >= op2, or op1 < op2. - __ Cmp(out_lo, op2_lo); - __ Sbcs(temp, out_hi, op2_hi); - - // Now GE/LT condition code is correct for the long comparison. - { - vixl32::ConditionType cond = is_min ? ge : lt; - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ itt(cond); - __ mov(cond, out_lo, op2_lo); - __ mov(cond, out_hi, op2_hi); - } -} - -static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMaxLong(invoke, /* is_min */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMaxLong(invoke, /* is_min */ false, GetAssembler()); -} - -static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { - vixl32::Register op1 = InputRegisterAt(invoke, 0); - vixl32::Register op2 = InputRegisterAt(invoke, 1); - vixl32::Register out = OutputRegister(invoke); - - __ Cmp(op1, op2); - - { - ExactAssemblyScope aas(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - - __ ite(is_min ? lt : gt); - __ mov(is_min ? lt : gt, out, op1); - __ mov(is_min ? ge : le, out, op2); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke, /* is_min */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke, /* is_min */ false, GetAssembler()); -} - void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index cafa5228d9..d108c43a4c 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -742,574 +742,6 @@ void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) { GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler()); } -static void MathAbsFP(LocationSummary* locations, - bool is64bit, - bool isR2OrNewer, - bool isR6, - MipsAssembler* assembler) { - FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - - // Note, as a "quality of implementation", rather than pure "spec compliance", we require that - // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN - // (signaling NaN may become quiet though). - // - // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case, - // both regular floating point numbers and NAN values are treated alike, only the sign bit is - // affected by this instruction. - // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any - // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be - // changed when doing abs(NaN). Because of that, we clear sign bit in a different way. - if (isR6) { - if (is64bit) { - __ AbsD(out, in); - } else { - __ AbsS(out, in); - } - } else { - if (is64bit) { - if (in != out) { - __ MovD(out, in); - } - __ MoveFromFpuHigh(TMP, in); - // ins instruction is not available for R1. - if (isR2OrNewer) { - __ Ins(TMP, ZERO, 31, 1); - } else { - __ Sll(TMP, TMP, 1); - __ Srl(TMP, TMP, 1); - } - __ MoveToFpuHigh(TMP, out); - } else { - __ Mfc1(TMP, in); - // ins instruction is not available for R1. - if (isR2OrNewer) { - __ Ins(TMP, ZERO, 31, 1); - } else { - __ Sll(TMP, TMP, 1); - __ Srl(TMP, TMP, 1); - } - __ Mtc1(TMP, out); - } - } -} - -// double java.lang.Math.abs(double) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, IsR2OrNewer(), IsR6(), GetAssembler()); -} - -// float java.lang.Math.abs(float) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, IsR2OrNewer(), IsR6(), GetAssembler()); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { - if (is64bit) { - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - // The comments in this section show the analogous operations which would - // be performed if we had 64-bit registers "in", and "out". - // __ Dsra32(AT, in, 31); - __ Sra(AT, in_hi, 31); - // __ Xor(out, in, AT); - __ Xor(TMP, in_lo, AT); - __ Xor(out_hi, in_hi, AT); - // __ Dsubu(out, out, AT); - __ Subu(out_lo, TMP, AT); - __ Sltu(TMP, out_lo, TMP); - __ Addu(out_hi, out_hi, TMP); - } else { - Register in = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - __ Sra(AT, in, 31); - __ Xor(out, in, AT); - __ Subu(out, out, AT); - } -} - -// int java.lang.Math.abs(int) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -// long java.lang.Math.abs(long) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type, - bool is_R6, - MipsAssembler* assembler) { - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); - - if (is_R6) { - MipsLabel noNaNs; - MipsLabel done; - FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); - } else { - MipsLabel ordered; - MipsLabel compare; - MipsLabel select; - MipsLabel done; - - if (type == DataType::Type::kFloat64) { - __ CunD(a, b); - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CunS(a, b); - } - __ Bc1f(&ordered); - - // a or b (or both) is a NaN. Return one, which is a NaN. - if (type == DataType::Type::kFloat64) { - __ CeqD(b, b); - } else { - __ CeqS(b, b); - } - __ B(&select); - - __ Bind(&ordered); - - // Neither is a NaN. - // a == b? (-0.0 compares equal with +0.0) - // If equal, handle zeroes, else compare further. - if (type == DataType::Type::kFloat64) { - __ CeqD(a, b); - } else { - __ CeqS(a, b); - } - __ Bc1f(&compare); - - // a == b either bit for bit or one is -0.0 and the other is +0.0. - if (type == DataType::Type::kFloat64) { - __ MoveFromFpuHigh(TMP, a); - __ MoveFromFpuHigh(AT, b); - } else { - __ Mfc1(TMP, a); - __ Mfc1(AT, b); - } - - if (is_min) { - // -0.0 prevails over +0.0. - __ Or(TMP, TMP, AT); - } else { - // +0.0 prevails over -0.0. - __ And(TMP, TMP, AT); - } - - if (type == DataType::Type::kFloat64) { - __ Mfc1(AT, a); - __ Mtc1(AT, out); - __ MoveToFpuHigh(TMP, out); - } else { - __ Mtc1(TMP, out); - } - __ B(&done); - - __ Bind(&compare); - - if (type == DataType::Type::kFloat64) { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeD(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeD(b, a); // b <= a - } - } else { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeS(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeS(b, a); // b <= a - } - } - - __ Bind(&select); - - if (type == DataType::Type::kFloat64) { - __ MovtD(out, a); - __ MovfD(out, b); - } else { - __ MovtS(out, a); - __ MovfS(out, b); - } - - __ Bind(&done); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); -} - -// double java.lang.Math.min(double, double) -void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kFloat64, - IsR6(), - GetAssembler()); -} - -// float java.lang.Math.min(float, float) -void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kFloat32, - IsR6(), - GetAssembler()); -} - -// double java.lang.Math.max(double, double) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kFloat64, - IsR6(), - GetAssembler()); -} - -// float java.lang.Math.max(float, float) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kFloat32, - IsR6(), - GetAssembler()); -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - DataType::Type type, - bool is_R6, - MipsAssembler* assembler) { - if (is_R6) { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions - // always change the target (output) register. If the condition is - // true the output register gets the contents of the "rs" register; - // otherwise, the output register is set to zero. One consequence - // of this is that to implement something like "rd = c==0 ? rs : rt" - // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. - // After executing this pair of instructions one of the output - // registers from the pair will necessarily contain zero. Then the - // code ORs the output registers from the SELEQZ/SELNEZ instructions - // to get the final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, b_hi, a_hi); - __ Bne(b_hi, a_hi, &compare_done); - - __ Sltu(TMP, b_lo, a_lo); - - __ Bind(&compare_done); - - if (is_min) { - __ Seleqz(AT, a_lo, TMP); - __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo - // because at this point we're - // done using a_lo/b_lo. - } else { - __ Selnez(AT, a_lo, TMP); - __ Seleqz(out_lo, b_lo, TMP); // ditto - } - __ Or(out_lo, out_lo, AT); - if (is_min) { - __ Seleqz(AT, a_hi, TMP); - __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } else { - __ Selnez(AT, a_hi, TMP); - __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } - __ Or(out_hi, out_hi, AT); - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, b, a); - if (is_min) { - __ Seleqz(TMP, a, AT); - __ Selnez(AT, b, AT); - } else { - __ Selnez(TMP, a, AT); - __ Seleqz(AT, b, AT); - } - __ Or(out, TMP, AT); - } - } - } else { - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, a_hi, b_hi); - __ Bne(a_hi, b_hi, &compare_done); - - __ Sltu(TMP, a_lo, b_lo); - - __ Bind(&compare_done); - - if (is_min) { - if (out_lo != a_lo) { - __ Movn(out_hi, a_hi, TMP); - __ Movn(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movz(out_hi, b_hi, TMP); - __ Movz(out_lo, b_lo, TMP); - } - } else { - if (out_lo != a_lo) { - __ Movz(out_hi, a_hi, TMP); - __ Movz(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movn(out_hi, b_hi, TMP); - __ Movn(out_lo, b_lo, TMP); - } - } - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, a, b); - if (is_min) { - if (out != a) { - __ Movn(out, a, AT); - } - if (out != b) { - __ Movz(out, b, AT); - } - } else { - if (out != a) { - __ Movz(out, a, AT); - } - if (out != b) { - __ Movn(out, b, AT); - } - } - } - } - } -} - -// int java.lang.Math.min(int, int) -void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kInt32, - IsR6(), - GetAssembler()); -} - -// long java.lang.Math.min(long, long) -void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kInt64, - IsR6(), - GetAssembler()); -} - -// int java.lang.Math.max(int, int) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kInt32, - IsR6(), - GetAssembler()); -} - -// long java.lang.Math.max(long, long) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kInt64, - IsR6(), - GetAssembler()); -} - // double java.lang.Math.sqrt(double) void IntrinsicLocationsBuilderMIPS::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 89f1818be2..9987d05fb3 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -470,290 +470,6 @@ void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) { GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); } -static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - if (is64bit) { - __ AbsD(out, in); - } else { - __ AbsS(out, in); - } -} - -// double java.lang.Math.abs(double) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -// float java.lang.Math.abs(float) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -static void CreateIntToInt(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (is64bit) { - __ Dsra32(AT, in, 31); - __ Xor(out, in, AT); - __ Dsubu(out, out, AT); - } else { - __ Sra(AT, in, 31); - __ Xor(out, in, AT); - __ Subu(out, out, AT); - } -} - -// int java.lang.Math.abs(int) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToInt(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -// long java.lang.Math.abs(long) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToInt(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type, - Mips64Assembler* assembler) { - FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - Mips64Label noNaNs; - Mips64Label done; - FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -// double java.lang.Math.min(double, double) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat64, GetAssembler()); -} - -// float java.lang.Math.min(float, float) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat32, GetAssembler()); -} - -// double java.lang.Math.max(double, double) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat64, GetAssembler()); -} - -// float java.lang.Math.max(float, float) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat32, GetAssembler()); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - Mips64Assembler* assembler) { - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (lhs == rhs) { - if (out != lhs) { - __ Move(out, lhs); - } - } else { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always - // change the target (output) register. If the condition is true the - // output register gets the contents of the "rs" register; otherwise, - // the output register is set to zero. One consequence of this is - // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 - // needs to use a pair of SELEQZ/SELNEZ instructions. After - // executing this pair of instructions one of the output registers - // from the pair will necessarily contain zero. Then the code ORs the - // output registers from the SELEQZ/SELNEZ instructions to get the - // final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (out == lhs) { - __ Slt(AT, rhs, lhs); - if (is_min) { - __ Seleqz(out, lhs, AT); - __ Selnez(AT, rhs, AT); - } else { - __ Selnez(out, lhs, AT); - __ Seleqz(AT, rhs, AT); - } - } else { - __ Slt(AT, lhs, rhs); - if (is_min) { - __ Seleqz(out, rhs, AT); - __ Selnez(AT, lhs, AT); - } else { - __ Selnez(out, rhs, AT); - __ Seleqz(AT, lhs, AT); - } - } - __ Or(out, out, AT); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -// int java.lang.Math.min(int, int) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); -} - -// long java.lang.Math.min(long, long) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); -} - -// int java.lang.Math.max(int, int) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); -} - -// long java.lang.Math.max(long, long) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); -} - // double java.lang.Math.sqrt(double) void IntrinsicLocationsBuilderMIPS64::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 46b7f3f1ce..c4f322bf0c 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -40,11 +40,6 @@ namespace art { namespace x86 { -static constexpr int kDoubleNaNHigh = 0x7FF80000; -static constexpr int kDoubleNaNLow = 0x00000000; -static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); -static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); - IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) { @@ -333,432 +328,6 @@ void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } - -// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we -// need is 64b. - -static void CreateFloatToFloat(ArenaAllocator* allocator, HInvoke* invoke) { - // TODO: Enable memory operations when the assembler supports them. - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(static_or_direct != nullptr); - if (static_or_direct->HasSpecialInput() && - invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { - // We need addressibility for the constant area. - locations->SetInAt(1, Location::RequiresRegister()); - // We need a temporary to hold the constant. - locations->AddTemp(Location::RequiresFpuRegister()); - } -} - -static void MathAbsFP(HInvoke* invoke, - bool is64bit, - X86Assembler* assembler, - CodeGeneratorX86* codegen) { - LocationSummary* locations = invoke->GetLocations(); - Location output = locations->Out(); - - DCHECK(output.IsFpuRegister()); - if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { - HX86ComputeBaseMethodAddress* method_address = - invoke->InputAt(1)->AsX86ComputeBaseMethodAddress(); - DCHECK(locations->InAt(1).IsRegister()); - // We also have a constant area pointer. - Register constant_area = locations->InAt(1).AsRegister<Register>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - if (is64bit) { - __ movsd(temp, codegen->LiteralInt64Address( - INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area)); - __ andpd(output.AsFpuRegister<XmmRegister>(), temp); - } else { - __ movss(temp, codegen->LiteralInt32Address( - INT32_C(0x7FFFFFFF), method_address, constant_area)); - __ andps(output.AsFpuRegister<XmmRegister>(), temp); - } - } else { - // Create the right constant on an aligned stack. - if (is64bit) { - __ subl(ESP, Immediate(8)); - __ pushl(Immediate(0x7FFFFFFF)); - __ pushl(Immediate(0xFFFFFFFF)); - __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); - } else { - __ subl(ESP, Immediate(12)); - __ pushl(Immediate(0x7FFFFFFF)); - __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); - } - __ addl(ESP, Immediate(16)); - } -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) { - CreateFloatToFloat(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { - CreateFloatToFloat(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_); -} - -static void CreateAbsIntLocation(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RegisterLocation(EAX)); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RegisterLocation(EDX)); -} - -static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) { - Location output = locations->Out(); - Register out = output.AsRegister<Register>(); - DCHECK_EQ(out, EAX); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - DCHECK_EQ(temp, EDX); - - // Sign extend EAX into EDX. - __ cdq(); - - // XOR EAX with sign. - __ xorl(EAX, EDX); - - // Subtract out sign to correct. - __ subl(EAX, EDX); - - // The result is in EAX. -} - -static void CreateAbsLongLocation(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) { - Location input = locations->InAt(0); - Register input_lo = input.AsRegisterPairLow<Register>(); - Register input_hi = input.AsRegisterPairHigh<Register>(); - Location output = locations->Out(); - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - - // Compute the sign into the temporary. - __ movl(temp, input_hi); - __ sarl(temp, Immediate(31)); - - // Store the sign into the output. - __ movl(output_lo, temp); - __ movl(output_hi, temp); - - // XOR the input to the output. - __ xorl(output_lo, input_lo); - __ xorl(output_hi, input_hi); - - // Subtract the sign. - __ subl(output_lo, temp); - __ sbbl(output_hi, temp); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) { - CreateAbsIntLocation(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) { - CreateAbsLongLocation(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) { - GenAbsLong(invoke->GetLocations(), GetAssembler()); -} - -static void GenMinMaxFP(HInvoke* invoke, - bool is_min, - bool is_double, - X86Assembler* assembler, - CodeGeneratorX86* codegen) { - LocationSummary* locations = invoke->GetLocations(); - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - Location out_loc = locations->Out(); - XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); - return; - } - - // (out := op1) - // out <=? op2 - // if Nan jmp Nan_label - // if out is min jmp done - // if op2 is min jmp op2_label - // handle -0/+0 - // jmp done - // Nan_label: - // out := NaN - // op2_label: - // out := op2 - // done: - // - // This removes one jmp, but needs to copy one input (op1) to out. - // - // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? - - XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - - NearLabel nan, done, op2_label; - if (is_double) { - __ ucomisd(out, op2); - } else { - __ ucomiss(out, op2); - } - - __ j(Condition::kParityEven, &nan); - - __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); - __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); - - // Handle 0.0/-0.0. - if (is_min) { - if (is_double) { - __ orpd(out, op2); - } else { - __ orps(out, op2); - } - } else { - if (is_double) { - __ andpd(out, op2); - } else { - __ andps(out, op2); - } - } - __ jmp(&done); - - // NaN handling. - __ Bind(&nan); - // Do we have a constant area pointer? - if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) { - HX86ComputeBaseMethodAddress* method_address = - invoke->InputAt(2)->AsX86ComputeBaseMethodAddress(); - DCHECK(locations->InAt(2).IsRegister()); - Register constant_area = locations->InAt(2).AsRegister<Register>(); - if (is_double) { - __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area)); - } else { - __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area)); - } - } else { - if (is_double) { - __ pushl(Immediate(kDoubleNaNHigh)); - __ pushl(Immediate(kDoubleNaNLow)); - __ movsd(out, Address(ESP, 0)); - __ addl(ESP, Immediate(8)); - } else { - __ pushl(Immediate(kFloatNaN)); - __ movss(out, Address(ESP, 0)); - __ addl(ESP, Immediate(4)); - } - } - __ jmp(&done); - - // out := op2; - __ Bind(&op2_label); - if (is_double) { - __ movsd(out, op2); - } else { - __ movss(out, op2); - } - - // Done. - __ Bind(&done); -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - // The following is sub-optimal, but all we can do for now. It would be fine to also accept - // the second input to be the output (we can simply swap inputs). - locations->SetOut(Location::SameAsFirstInput()); - HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(static_or_direct != nullptr); - if (static_or_direct->HasSpecialInput() && - invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { - locations->SetInAt(2, Location::RequiresRegister()); - } -} - -void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ true, - /* is_double */ true, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ true, - /* is_double */ false, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ false, - /* is_double */ true, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ false, - /* is_double */ false, - GetAssembler(), - codegen_); -} - -static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, - X86Assembler* assembler) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - // Can return immediately, as op1_loc == out_loc. - // Note: if we ever support separate registers, e.g., output into memory, we need to check for - // a copy here. - DCHECK(locations->Out().Equals(op1_loc)); - return; - } - - if (is_long) { - // Need to perform a subtract to get the sign right. - // op1 is already in the same location as the output. - Location output = locations->Out(); - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); - - Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); - Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); - - // Spare register to compute the subtraction to set condition code. - Register temp = locations->GetTemp(0).AsRegister<Register>(); - - // Subtract off op2_low. - __ movl(temp, output_lo); - __ subl(temp, op2_lo); - - // Now use the same tempo and the borrow to finish the subtraction of op2_hi. - __ movl(temp, output_hi); - __ sbbl(temp, op2_hi); - - // Now the condition code is correct. - Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; - __ cmovl(cond, output_lo, op2_lo); - __ cmovl(cond, output_hi, op2_hi); - } else { - Register out = locations->Out().AsRegister<Register>(); - Register op2 = op2_loc.AsRegister<Register>(); - - // (out := op1) - // out <=? op2 - // if out is min jmp done - // out := op2 - // done: - - __ cmpl(out, op2); - Condition cond = is_min ? Condition::kGreater : Condition::kLess; - __ cmovl(cond, out, op2); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - // Register to use to perform a long subtract to set cc. - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); -} - static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 6483b7cb2a..437bc3dd3c 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -236,304 +236,6 @@ void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } - -// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we -// need is 64b. - -static void CreateFloatToFloatPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) { - // TODO: Enable memory operations when the assembler supports them. - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. -} - -static void MathAbsFP(LocationSummary* locations, - bool is64bit, - X86_64Assembler* assembler, - CodeGeneratorX86_64* codegen) { - Location output = locations->Out(); - - DCHECK(output.IsFpuRegister()); - XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - - // TODO: Can mask directly with constant area using pand if we can guarantee - // that the literal is aligned on a 16 byte boundary. This will avoid a - // temporary. - if (is64bit) { - __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); - __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } else { - __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); - __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFloatToFloatPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFloatToFloatPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_); -} - -static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { - Location output = locations->Out(); - CpuRegister out = output.AsRegister<CpuRegister>(); - CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); - - if (is64bit) { - // Create mask. - __ movq(mask, out); - __ sarq(mask, Immediate(63)); - // Add mask. - __ addq(out, mask); - __ xorq(out, mask); - } else { - // Create mask. - __ movl(mask, out); - __ sarl(mask, Immediate(31)); - // Add mask. - __ addl(out, mask); - __ xorl(out, mask); - } -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - bool is_double, - X86_64Assembler* assembler, - CodeGeneratorX86_64* codegen) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - Location out_loc = locations->Out(); - XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); - return; - } - - // (out := op1) - // out <=? op2 - // if Nan jmp Nan_label - // if out is min jmp done - // if op2 is min jmp op2_label - // handle -0/+0 - // jmp done - // Nan_label: - // out := NaN - // op2_label: - // out := op2 - // done: - // - // This removes one jmp, but needs to copy one input (op1) to out. - // - // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? - - XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - - NearLabel nan, done, op2_label; - if (is_double) { - __ ucomisd(out, op2); - } else { - __ ucomiss(out, op2); - } - - __ j(Condition::kParityEven, &nan); - - __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); - __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); - - // Handle 0.0/-0.0. - if (is_min) { - if (is_double) { - __ orpd(out, op2); - } else { - __ orps(out, op2); - } - } else { - if (is_double) { - __ andpd(out, op2); - } else { - __ andps(out, op2); - } - } - __ jmp(&done); - - // NaN handling. - __ Bind(&nan); - if (is_double) { - __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); - } else { - __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); - } - __ jmp(&done); - - // out := op2; - __ Bind(&op2_label); - if (is_double) { - __ movsd(out, op2); - } else { - __ movss(out, op2); - } - - // Done. - __ Bind(&done); -} - -static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - // The following is sub-optimal, but all we can do for now. It would be fine to also accept - // the second input to be the output (we can simply swap inputs). - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_); -} - -static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, - X86_64Assembler* assembler) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - // Can return immediately, as op1_loc == out_loc. - // Note: if we ever support separate registers, e.g., output into memory, we need to check for - // a copy here. - DCHECK(locations->Out().Equals(op1_loc)); - return; - } - - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); - - // (out := op1) - // out <=? op2 - // if out is min jmp done - // out := op2 - // done: - - if (is_long) { - __ cmpq(out, op2); - } else { - __ cmpl(out, op2); - } - - __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); -} - static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 899496328e..d3b081e005 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -334,29 +334,14 @@ static bool IsAddConst(HInstruction* instruction, // Detect reductions of the following forms, // x = x_phi + .. // x = x_phi - .. -// x = max(x_phi, ..) // x = min(x_phi, ..) +// x = max(x_phi, ..) static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { - if (reduction->IsAdd()) { + if (reduction->IsAdd() || reduction->IsMin() || reduction->IsMax()) { return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) || (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi); } else if (reduction->IsSub()) { return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi); - } else if (reduction->IsInvokeStaticOrDirect()) { - switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: - return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) || - (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi); - default: - return false; - } } return false; } @@ -1297,80 +1282,59 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return true; } } - } else if (instruction->IsInvokeStaticOrDirect()) { - // Accept particular intrinsics. - HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect(); - switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - case Intrinsics::kMathAbsFloat: - case Intrinsics::kMathAbsDouble: { - // Deal with vector restrictions. - HInstruction* opa = instruction->InputAt(0); - HInstruction* r = opa; - bool is_unsigned = false; - if (HasVectorRestrictions(restrictions, kNoAbs)) { - return false; - } else if (HasVectorRestrictions(restrictions, kNoHiBits) && - (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { - return false; // reject, unless operand is sign-extension narrower - } - // Accept ABS(x) for vectorizable operand. - DCHECK(r != nullptr); - if (generate_code && vector_mode_ != kVector) { // de-idiom - r = opa; - } - if (VectorizeUse(node, r, generate_code, type, restrictions)) { - if (generate_code) { - GenerateVecOp(instruction, - vector_map_->Get(r), - nullptr, - HVecOperation::ToProperType(type, is_unsigned)); - } - return true; - } - return false; + } else if (instruction->IsAbs()) { + // Deal with vector restrictions. + HInstruction* opa = instruction->InputAt(0); + HInstruction* r = opa; + bool is_unsigned = false; + if (HasVectorRestrictions(restrictions, kNoAbs)) { + return false; + } else if (HasVectorRestrictions(restrictions, kNoHiBits) && + (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { + return false; // reject, unless operand is sign-extension narrower + } + // Accept ABS(x) for vectorizable operand. + DCHECK(r != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + } + if (VectorizeUse(node, r, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, + vector_map_->Get(r), + nullptr, + HVecOperation::ToProperType(type, is_unsigned)); } - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: { - // Deal with vector restrictions. - HInstruction* opa = instruction->InputAt(0); - HInstruction* opb = instruction->InputAt(1); - HInstruction* r = opa; - HInstruction* s = opb; - bool is_unsigned = false; - if (HasVectorRestrictions(restrictions, kNoMinMax)) { - return false; - } else if (HasVectorRestrictions(restrictions, kNoHiBits) && - !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) { - return false; // reject, unless all operands are same-extension narrower - } - // Accept MIN/MAX(x, y) for vectorizable operands. - DCHECK(r != nullptr); - DCHECK(s != nullptr); - if (generate_code && vector_mode_ != kVector) { // de-idiom - r = opa; - s = opb; - } - if (VectorizeUse(node, r, generate_code, type, restrictions) && - VectorizeUse(node, s, generate_code, type, restrictions)) { - if (generate_code) { - GenerateVecOp( - instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned); - } - return true; - } - return false; + return true; + } + } else if (instruction->IsMin() || instruction->IsMax()) { + // Deal with vector restrictions. + HInstruction* opa = instruction->InputAt(0); + HInstruction* opb = instruction->InputAt(1); + HInstruction* r = opa; + HInstruction* s = opb; + bool is_unsigned = false; + if (HasVectorRestrictions(restrictions, kNoMinMax)) { + return false; + } else if (HasVectorRestrictions(restrictions, kNoHiBits) && + !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) { + return false; // reject, unless all operands are same-extension narrower + } + // Accept MIN/MAX(x, y) for vectorizable operands. + DCHECK(r != nullptr); + DCHECK(s != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + s = opb; + } + if (VectorizeUse(node, r, generate_code, type, restrictions) && + VectorizeUse(node, s, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp( + instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned); } - default: - return false; - } // switch + return true; + } } return false; } @@ -1811,83 +1775,29 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, GENERATE_VEC( new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_, dex_pc), new (global_allocator_) HUShr(org_type, opa, opb, dex_pc)); - case HInstruction::kInvokeStaticOrDirect: { - HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect(); - if (vector_mode_ == kVector) { - switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - case Intrinsics::kMathAbsFloat: - case Intrinsics::kMathAbsDouble: - DCHECK(opb == nullptr); - vector = new (global_allocator_) - HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc); - break; - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: { - vector = new (global_allocator_) - HVecMin(global_allocator_, - opa, - opb, - HVecOperation::ToProperType(type, is_unsigned), - vector_length_, - dex_pc); - break; - } - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: { - vector = new (global_allocator_) - HVecMax(global_allocator_, - opa, - opb, - HVecOperation::ToProperType(type, is_unsigned), - vector_length_, - dex_pc); - break; - } - default: - LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId(); - UNREACHABLE(); - } // switch invoke - } else { - // In scalar code, simply clone the method invoke, and replace its operands with the - // corresponding new scalar instructions in the loop. The instruction will get an - // environment while being inserted from the instruction map in original program order. - DCHECK(vector_mode_ == kSequential); - size_t num_args = invoke->GetNumberOfArguments(); - HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect( - global_allocator_, - num_args, - invoke->GetType(), - invoke->GetDexPc(), - invoke->GetDexMethodIndex(), - invoke->GetResolvedMethod(), - invoke->GetDispatchInfo(), - invoke->GetInvokeType(), - invoke->GetTargetMethod(), - invoke->GetClinitCheckRequirement()); - HInputsRef inputs = invoke->GetInputs(); - size_t num_inputs = inputs.size(); - DCHECK_LE(num_args, num_inputs); - DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree - for (size_t index = 0; index < num_inputs; ++index) { - HInstruction* new_input = index < num_args - ? vector_map_->Get(inputs[index]) - : inputs[index]; // beyond arguments: just pass through - new_invoke->SetArgumentAt(index, new_input); - } - new_invoke->SetIntrinsic(invoke->GetIntrinsic(), - kNeedsEnvironmentOrCache, - kNoSideEffects, - kNoThrow); - vector = new_invoke; - } - break; - } + case HInstruction::kMin: + GENERATE_VEC( + new (global_allocator_) HVecMin(global_allocator_, + opa, + opb, + HVecOperation::ToProperType(type, is_unsigned), + vector_length_, + dex_pc), + new (global_allocator_) HMin(org_type, opa, opb, dex_pc)); + case HInstruction::kMax: + GENERATE_VEC( + new (global_allocator_) HVecMax(global_allocator_, + opa, + opb, + HVecOperation::ToProperType(type, is_unsigned), + vector_length_, + dex_pc), + new (global_allocator_) HMax(org_type, opa, opb, dex_pc)); + case HInstruction::kAbs: + DCHECK(opb == nullptr); + GENERATE_VEC( + new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc), + new (global_allocator_) HAbs(org_type, opa, dex_pc)); default: break; } // switch @@ -1998,9 +1908,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, HInstruction* v = instruction->InputAt(1); HInstruction* a = nullptr; HInstruction* b = nullptr; - if (v->IsInvokeStaticOrDirect() && - (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt || - v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) { + if (v->GetType() == reduction_type && v->IsAbs()) { HInstruction* x = v->InputAt(0); if (x->GetType() == reduction_type) { int64_t c = 0; @@ -2054,14 +1962,13 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, VectorizeUse(node, r, generate_code, sub_type, restrictions) && VectorizeUse(node, s, generate_code, sub_type, restrictions)) { if (generate_code) { - reduction_type = HVecOperation::ToProperType(reduction_type, is_unsigned); if (vector_mode_ == kVector) { vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate( global_allocator_, vector_map_->Get(q), vector_map_->Get(r), vector_map_->Get(s), - reduction_type, + HVecOperation::ToProperType(reduction_type, is_unsigned), GetOtherVL(reduction_type, sub_type, vector_length_), kNoDexPc)); MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 91e475d737..f6ba19f22a 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1121,10 +1121,6 @@ void HEnvironment::RemoveAsUserOfInput(size_t index) const { user->FixUpUserRecordsAfterEnvUseRemoval(before_env_use_node); } -HInstruction::InstructionKind HInstruction::GetKind() const { - return GetKindInternal(); -} - HInstruction* HInstruction::GetNextDisregardingMoves() const { HInstruction* next = GetNext(); while (next != nullptr && next->IsParallelMove()) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index f91d37b3ac..9da46206da 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -26,6 +26,7 @@ #include "base/arena_object.h" #include "base/array_ref.h" #include "base/iteration_range.h" +#include "base/quasi_atomic.h" #include "base/stl_util.h" #include "base/transform_array_ref.h" #include "data_type.h" @@ -1337,6 +1338,7 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ M(Above, Condition) \ M(AboveOrEqual, Condition) \ + M(Abs, UnaryOperation) \ M(Add, BinaryOperation) \ M(And, BinaryOperation) \ M(ArrayGet, Instruction) \ @@ -1382,7 +1384,9 @@ class HLoopInformationOutwardIterator : public ValueObject { M(LoadException, Instruction) \ M(LoadString, Instruction) \ M(LongConstant, Constant) \ + M(Max, Instruction) \ M(MemoryBarrier, Instruction) \ + M(Min, BinaryOperation) \ M(MonitorOperation, Instruction) \ M(Mul, BinaryOperation) \ M(NativeDebugInfo, Instruction) \ @@ -1518,7 +1522,6 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) private: \ H##type& operator=(const H##type&) = delete; \ public: \ - InstructionKind GetKindInternal() const OVERRIDE { return k##type; } \ const char* DebugName() const OVERRIDE { return #type; } \ bool InstructionTypeEquals(const HInstruction* other) const OVERRIDE { \ return other->Is##type(); \ @@ -1952,7 +1955,14 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { class HInstruction : public ArenaObject<kArenaAllocInstruction> { public: - HInstruction(SideEffects side_effects, uint32_t dex_pc) +#define DECLARE_KIND(type, super) k##type, + enum InstructionKind { + FOR_EACH_INSTRUCTION(DECLARE_KIND) + kLastInstructionKind + }; +#undef DECLARE_KIND + + HInstruction(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) : previous_(nullptr), next_(nullptr), block_(nullptr), @@ -1966,16 +1976,12 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { lifetime_position_(kNoLifetime), side_effects_(side_effects), reference_type_handle_(ReferenceTypeInfo::CreateInvalid().GetTypeHandle()) { + SetPackedField<InstructionKindField>(kind); SetPackedFlag<kFlagReferenceTypeIsExact>(ReferenceTypeInfo::CreateInvalid().IsExact()); } virtual ~HInstruction() {} -#define DECLARE_KIND(type, super) k##type, - enum InstructionKind { - FOR_EACH_INSTRUCTION(DECLARE_KIND) - }; -#undef DECLARE_KIND HInstruction* GetNext() const { return next_; } HInstruction* GetPrevious() const { return previous_; } @@ -2279,8 +2285,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide // the virtual function because the __attribute__((__pure__)) doesn't really // apply the strong requirement for virtual functions, preventing optimizations. - InstructionKind GetKind() const PURE; - virtual InstructionKind GetKindInternal() const = 0; + InstructionKind GetKind() const { return GetPackedField<InstructionKindField>(); } virtual size_t ComputeHashCode() const { size_t result = GetKind(); @@ -2332,9 +2337,16 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // its users. Used by liveness analysis to compute use positions accordingly. static constexpr size_t kFlagEmittedAtUseSite = 0u; static constexpr size_t kFlagReferenceTypeIsExact = kFlagEmittedAtUseSite + 1; - static constexpr size_t kNumberOfGenericPackedBits = kFlagReferenceTypeIsExact + 1; + static constexpr size_t kFieldInstructionKind = kFlagReferenceTypeIsExact + 1; + static constexpr size_t kFieldInstructionKindSize = + MinimumBitsToStore(static_cast<size_t>(InstructionKind::kLastInstructionKind - 1)); + static constexpr size_t kNumberOfGenericPackedBits = + kFieldInstructionKind + kFieldInstructionKindSize; static constexpr size_t kMaxNumberOfPackedBits = sizeof(uint32_t) * kBitsPerByte; + static_assert(kNumberOfGenericPackedBits <= kMaxNumberOfPackedBits, + "Too many generic packed fields"); + const HUserRecord<HInstruction*> InputRecordAt(size_t i) const { return GetInputRecords()[i]; } @@ -2391,9 +2403,13 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { live_interval_(nullptr), lifetime_position_(kNoLifetime), side_effects_(other.side_effects_), - reference_type_handle_(other.reference_type_handle_) {} + reference_type_handle_(other.reference_type_handle_) { + } private: + using InstructionKindField = + BitField<InstructionKind, kFieldInstructionKind, kFieldInstructionKindSize>; + void FixUpUserRecordsAfterUseInsertion(HUseList<HInstruction*>::iterator fixup_end) { auto before_use_node = uses_.before_begin(); for (auto use_node = uses_.begin(); use_node != fixup_end; ++use_node) { @@ -2569,12 +2585,13 @@ class HVariableInputSizeInstruction : public HInstruction { void RemoveAllInputs(); protected: - HVariableInputSizeInstruction(SideEffects side_effects, + HVariableInputSizeInstruction(InstructionKind inst_kind, + SideEffects side_effects, uint32_t dex_pc, ArenaAllocator* allocator, size_t number_of_inputs, ArenaAllocKind kind) - : HInstruction(side_effects, dex_pc), + : HInstruction(inst_kind, side_effects, dex_pc), inputs_(number_of_inputs, allocator->Adapter(kind)) {} DEFAULT_COPY_CONSTRUCTOR(VariableInputSizeInstruction); @@ -2585,8 +2602,8 @@ class HVariableInputSizeInstruction : public HInstruction { template<size_t N> class HTemplateInstruction: public HInstruction { public: - HTemplateInstruction<N>(SideEffects side_effects, uint32_t dex_pc) - : HInstruction(side_effects, dex_pc), inputs_() {} + HTemplateInstruction<N>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) + : HInstruction(kind, side_effects, dex_pc), inputs_() {} virtual ~HTemplateInstruction() {} using HInstruction::GetInputRecords; // Keep the const version visible. @@ -2607,8 +2624,8 @@ class HTemplateInstruction: public HInstruction { template<> class HTemplateInstruction<0>: public HInstruction { public: - explicit HTemplateInstruction<0>(SideEffects side_effects, uint32_t dex_pc) - : HInstruction(side_effects, dex_pc) {} + explicit HTemplateInstruction<0>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) + : HInstruction(kind, side_effects, dex_pc) {} virtual ~HTemplateInstruction() {} @@ -2627,8 +2644,12 @@ class HTemplateInstruction<0>: public HInstruction { template<intptr_t N> class HExpression : public HTemplateInstruction<N> { public: - HExpression<N>(DataType::Type type, SideEffects side_effects, uint32_t dex_pc) - : HTemplateInstruction<N>(side_effects, dex_pc) { + using HInstruction::InstructionKind; + HExpression<N>(InstructionKind kind, + DataType::Type type, + SideEffects side_effects, + uint32_t dex_pc) + : HTemplateInstruction<N>(kind, side_effects, dex_pc) { this->template SetPackedField<TypeField>(type); } virtual ~HExpression() {} @@ -2653,7 +2674,8 @@ class HExpression : public HTemplateInstruction<N> { class HReturnVoid FINAL : public HTemplateInstruction<0> { public: explicit HReturnVoid(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc) {} + : HTemplateInstruction(kReturnVoid, SideEffects::None(), dex_pc) { + } bool IsControlFlow() const OVERRIDE { return true; } @@ -2668,7 +2690,7 @@ class HReturnVoid FINAL : public HTemplateInstruction<0> { class HReturn FINAL : public HTemplateInstruction<1> { public: explicit HReturn(HInstruction* value, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc) { + : HTemplateInstruction(kReturn, SideEffects::None(), dex_pc) { SetRawInputAt(0, value); } @@ -2688,6 +2710,7 @@ class HPhi FINAL : public HVariableInputSizeInstruction { DataType::Type type, uint32_t dex_pc = kNoDexPc) : HVariableInputSizeInstruction( + kPhi, SideEffects::None(), dex_pc, allocator, @@ -2788,7 +2811,9 @@ class HPhi FINAL : public HVariableInputSizeInstruction { // exit block. class HExit FINAL : public HTemplateInstruction<0> { public: - explicit HExit(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {} + explicit HExit(uint32_t dex_pc = kNoDexPc) + : HTemplateInstruction(kExit, SideEffects::None(), dex_pc) { + } bool IsControlFlow() const OVERRIDE { return true; } @@ -2801,7 +2826,9 @@ class HExit FINAL : public HTemplateInstruction<0> { // Jumps from one block to another. class HGoto FINAL : public HTemplateInstruction<0> { public: - explicit HGoto(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {} + explicit HGoto(uint32_t dex_pc = kNoDexPc) + : HTemplateInstruction(kGoto, SideEffects::None(), dex_pc) { + } bool IsClonable() const OVERRIDE { return true; } bool IsControlFlow() const OVERRIDE { return true; } @@ -2818,8 +2845,9 @@ class HGoto FINAL : public HTemplateInstruction<0> { class HConstant : public HExpression<0> { public: - explicit HConstant(DataType::Type type, uint32_t dex_pc = kNoDexPc) - : HExpression(type, SideEffects::None(), dex_pc) {} + explicit HConstant(InstructionKind kind, DataType::Type type, uint32_t dex_pc = kNoDexPc) + : HExpression(kind, type, SideEffects::None(), dex_pc) { + } bool CanBeMoved() const OVERRIDE { return true; } @@ -2860,7 +2888,8 @@ class HNullConstant FINAL : public HConstant { private: explicit HNullConstant(uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kReference, dex_pc) {} + : HConstant(kNullConstant, DataType::Type::kReference, dex_pc) { + } friend class HGraph; }; @@ -2899,9 +2928,12 @@ class HIntConstant FINAL : public HConstant { private: explicit HIntConstant(int32_t value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kInt32, dex_pc), value_(value) {} + : HConstant(kIntConstant, DataType::Type::kInt32, dex_pc), value_(value) { + } explicit HIntConstant(bool value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kInt32, dex_pc), value_(value ? 1 : 0) {} + : HConstant(kIntConstant, DataType::Type::kInt32, dex_pc), + value_(value ? 1 : 0) { + } const int32_t value_; @@ -2935,7 +2967,9 @@ class HLongConstant FINAL : public HConstant { private: explicit HLongConstant(int64_t value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kInt64, dex_pc), value_(value) {} + : HConstant(kLongConstant, DataType::Type::kInt64, dex_pc), + value_(value) { + } const int64_t value_; @@ -2986,9 +3020,13 @@ class HFloatConstant FINAL : public HConstant { private: explicit HFloatConstant(float value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kFloat32, dex_pc), value_(value) {} + : HConstant(kFloatConstant, DataType::Type::kFloat32, dex_pc), + value_(value) { + } explicit HFloatConstant(int32_t value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kFloat32, dex_pc), value_(bit_cast<float, int32_t>(value)) {} + : HConstant(kFloatConstant, DataType::Type::kFloat32, dex_pc), + value_(bit_cast<float, int32_t>(value)) { + } const float value_; @@ -3039,9 +3077,13 @@ class HDoubleConstant FINAL : public HConstant { private: explicit HDoubleConstant(double value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kFloat64, dex_pc), value_(value) {} + : HConstant(kDoubleConstant, DataType::Type::kFloat64, dex_pc), + value_(value) { + } explicit HDoubleConstant(int64_t value, uint32_t dex_pc = kNoDexPc) - : HConstant(DataType::Type::kFloat64, dex_pc), value_(bit_cast<double, int64_t>(value)) {} + : HConstant(kDoubleConstant, DataType::Type::kFloat64, dex_pc), + value_(bit_cast<double, int64_t>(value)) { + } const double value_; @@ -3055,7 +3097,7 @@ class HDoubleConstant FINAL : public HConstant { class HIf FINAL : public HTemplateInstruction<1> { public: explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc) { + : HTemplateInstruction(kIf, SideEffects::None(), dex_pc) { SetRawInputAt(0, input); } @@ -3091,7 +3133,7 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> { }; explicit HTryBoundary(BoundaryKind kind, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc) { + : HTemplateInstruction(kTryBoundary, SideEffects::None(), dex_pc) { SetPackedField<BoundaryKindField>(kind); } @@ -3150,6 +3192,7 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { DeoptimizationKind kind, uint32_t dex_pc) : HVariableInputSizeInstruction( + kDeoptimize, SideEffects::All(), dex_pc, allocator, @@ -3173,6 +3216,7 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { DeoptimizationKind kind, uint32_t dex_pc) : HVariableInputSizeInstruction( + kDeoptimize, SideEffects::CanTriggerGC(), dex_pc, allocator, @@ -3241,7 +3285,12 @@ class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { // CHA guards are only optimized in a separate pass and it has no side effects // with regard to other passes. HShouldDeoptimizeFlag(ArenaAllocator* allocator, uint32_t dex_pc) - : HVariableInputSizeInstruction(SideEffects::None(), dex_pc, allocator, 0, kArenaAllocCHA) { + : HVariableInputSizeInstruction(kShouldDeoptimizeFlag, + SideEffects::None(), + dex_pc, + allocator, + 0, + kArenaAllocCHA) { } DataType::Type GetType() const OVERRIDE { return DataType::Type::kInt32; } @@ -3264,7 +3313,8 @@ class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { class HCurrentMethod FINAL : public HExpression<0> { public: explicit HCurrentMethod(DataType::Type type, uint32_t dex_pc = kNoDexPc) - : HExpression(type, SideEffects::None(), dex_pc) {} + : HExpression(kCurrentMethod, type, SideEffects::None(), dex_pc) { + } DECLARE_INSTRUCTION(CurrentMethod); @@ -3286,7 +3336,7 @@ class HClassTableGet FINAL : public HExpression<1> { TableKind kind, size_t index, uint32_t dex_pc) - : HExpression(type, SideEffects::None(), dex_pc), + : HExpression(kClassTableGet, type, SideEffects::None(), dex_pc), index_(index) { SetPackedField<TableKindField>(kind); SetRawInputAt(0, cls); @@ -3329,7 +3379,7 @@ class HPackedSwitch FINAL : public HTemplateInstruction<1> { uint32_t num_entries, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc), + : HTemplateInstruction(kPackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); @@ -3359,8 +3409,11 @@ class HPackedSwitch FINAL : public HTemplateInstruction<1> { class HUnaryOperation : public HExpression<1> { public: - HUnaryOperation(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HExpression(result_type, SideEffects::None(), dex_pc) { + HUnaryOperation(InstructionKind kind, + DataType::Type result_type, + HInstruction* input, + uint32_t dex_pc = kNoDexPc) + : HExpression(kind, result_type, SideEffects::None(), dex_pc) { SetRawInputAt(0, input); } @@ -3394,12 +3447,13 @@ class HUnaryOperation : public HExpression<1> { class HBinaryOperation : public HExpression<2> { public: - HBinaryOperation(DataType::Type result_type, + HBinaryOperation(InstructionKind kind, + DataType::Type result_type, HInstruction* left, HInstruction* right, SideEffects side_effects = SideEffects::None(), uint32_t dex_pc = kNoDexPc) - : HExpression(result_type, side_effects, dex_pc) { + : HExpression(kind, result_type, side_effects, dex_pc) { SetRawInputAt(0, left); SetRawInputAt(1, right); } @@ -3498,8 +3552,16 @@ std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs); class HCondition : public HBinaryOperation { public: - HCondition(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(DataType::Type::kBool, first, second, SideEffects::None(), dex_pc) { + HCondition(InstructionKind kind, + HInstruction* first, + HInstruction* second, + uint32_t dex_pc = kNoDexPc) + : HBinaryOperation(kind, + DataType::Type::kBool, + first, + second, + SideEffects::None(), + dex_pc) { SetPackedField<ComparisonBiasField>(ComparisonBias::kNoBias); } @@ -3579,7 +3641,8 @@ class HCondition : public HBinaryOperation { class HEqual FINAL : public HCondition { public: HEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kEqual, first, second, dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -3623,8 +3686,10 @@ class HEqual FINAL : public HCondition { class HNotEqual FINAL : public HCondition { public: - HNotEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + HNotEqual(HInstruction* first, HInstruction* second, + uint32_t dex_pc = kNoDexPc) + : HCondition(kNotEqual, first, second, dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -3667,8 +3732,10 @@ class HNotEqual FINAL : public HCondition { class HLessThan FINAL : public HCondition { public: - HLessThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + HLessThan(HInstruction* first, HInstruction* second, + uint32_t dex_pc = kNoDexPc) + : HCondition(kLessThan, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3705,8 +3772,10 @@ class HLessThan FINAL : public HCondition { class HLessThanOrEqual FINAL : public HCondition { public: - HLessThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + HLessThanOrEqual(HInstruction* first, HInstruction* second, + uint32_t dex_pc = kNoDexPc) + : HCondition(kLessThanOrEqual, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3744,7 +3813,8 @@ class HLessThanOrEqual FINAL : public HCondition { class HGreaterThan FINAL : public HCondition { public: HGreaterThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kGreaterThan, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3782,7 +3852,8 @@ class HGreaterThan FINAL : public HCondition { class HGreaterThanOrEqual FINAL : public HCondition { public: HGreaterThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kGreaterThanOrEqual, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3820,7 +3891,8 @@ class HGreaterThanOrEqual FINAL : public HCondition { class HBelow FINAL : public HCondition { public: HBelow(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kBelow, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3861,7 +3933,8 @@ class HBelow FINAL : public HCondition { class HBelowOrEqual FINAL : public HCondition { public: HBelowOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kBelowOrEqual, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3902,7 +3975,8 @@ class HBelowOrEqual FINAL : public HCondition { class HAbove FINAL : public HCondition { public: HAbove(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kAbove, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3943,7 +4017,8 @@ class HAbove FINAL : public HCondition { class HAboveOrEqual FINAL : public HCondition { public: HAboveOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kAboveOrEqual, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3993,7 +4068,8 @@ class HCompare FINAL : public HBinaryOperation { HInstruction* second, ComparisonBias bias, uint32_t dex_pc) - : HBinaryOperation(DataType::Type::kInt32, + : HBinaryOperation(kCompare, + DataType::Type::kInt32, first, second, SideEffectsForArchRuntimeCalls(comparison_type), @@ -4079,7 +4155,10 @@ class HNewInstance FINAL : public HExpression<1> { const DexFile& dex_file, bool finalizable, QuickEntrypointEnum entrypoint) - : HExpression(DataType::Type::kReference, SideEffects::CanTriggerGC(), dex_pc), + : HExpression(kNewInstance, + DataType::Type::kReference, + SideEffects::CanTriggerGC(), + dex_pc), type_index_(type_index), dex_file_(dex_file), entrypoint_(entrypoint) { @@ -4235,7 +4314,8 @@ class HInvoke : public HVariableInputSizeInstruction { using InvokeTypeField = BitField<InvokeType, kFieldInvokeType, kFieldInvokeTypeSize>; using ReturnTypeField = BitField<DataType::Type, kFieldReturnType, kFieldReturnTypeSize>; - HInvoke(ArenaAllocator* allocator, + HInvoke(InstructionKind kind, + ArenaAllocator* allocator, uint32_t number_of_arguments, uint32_t number_of_other_inputs, DataType::Type return_type, @@ -4244,6 +4324,7 @@ class HInvoke : public HVariableInputSizeInstruction { ArtMethod* resolved_method, InvokeType invoke_type) : HVariableInputSizeInstruction( + kind, SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. dex_pc, allocator, @@ -4278,7 +4359,8 @@ class HInvokeUnresolved FINAL : public HInvoke { uint32_t dex_pc, uint32_t dex_method_index, InvokeType invoke_type) - : HInvoke(allocator, + : HInvoke(kInvokeUnresolved, + allocator, number_of_arguments, 0u /* number_of_other_inputs */, return_type, @@ -4303,14 +4385,16 @@ class HInvokePolymorphic FINAL : public HInvoke { DataType::Type return_type, uint32_t dex_pc, uint32_t dex_method_index) - : HInvoke(allocator, + : HInvoke(kInvokePolymorphic, + allocator, number_of_arguments, 0u /* number_of_other_inputs */, return_type, dex_pc, dex_method_index, nullptr, - kVirtual) {} + kVirtual) { + } bool IsClonable() const OVERRIDE { return true; } @@ -4387,7 +4471,8 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { InvokeType invoke_type, MethodReference target_method, ClinitCheckRequirement clinit_check_requirement) - : HInvoke(allocator, + : HInvoke(kInvokeStaticOrDirect, + allocator, number_of_arguments, // There is potentially one extra argument for the HCurrentMethod node, and // potentially one other if the clinit check is explicit, and potentially @@ -4582,7 +4667,8 @@ class HInvokeVirtual FINAL : public HInvoke { uint32_t dex_method_index, ArtMethod* resolved_method, uint32_t vtable_index) - : HInvoke(allocator, + : HInvoke(kInvokeVirtual, + allocator, number_of_arguments, 0u, return_type, @@ -4590,7 +4676,8 @@ class HInvokeVirtual FINAL : public HInvoke { dex_method_index, resolved_method, kVirtual), - vtable_index_(vtable_index) {} + vtable_index_(vtable_index) { + } bool IsClonable() const OVERRIDE { return true; } @@ -4633,7 +4720,8 @@ class HInvokeInterface FINAL : public HInvoke { uint32_t dex_method_index, ArtMethod* resolved_method, uint32_t imt_index) - : HInvoke(allocator, + : HInvoke(kInvokeInterface, + allocator, number_of_arguments, 0u, return_type, @@ -4641,7 +4729,8 @@ class HInvokeInterface FINAL : public HInvoke { dex_method_index, resolved_method, kInterface), - imt_index_(imt_index) {} + imt_index_(imt_index) { + } bool IsClonable() const OVERRIDE { return true; } @@ -4670,7 +4759,7 @@ class HInvokeInterface FINAL : public HInvoke { class HNeg FINAL : public HUnaryOperation { public: HNeg(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HUnaryOperation(result_type, input, dex_pc) { + : HUnaryOperation(kNeg, result_type, input, dex_pc) { DCHECK_EQ(result_type, DataType::Kind(input->GetType())); } @@ -4698,7 +4787,7 @@ class HNeg FINAL : public HUnaryOperation { class HNewArray FINAL : public HExpression<2> { public: HNewArray(HInstruction* cls, HInstruction* length, uint32_t dex_pc) - : HExpression(DataType::Type::kReference, SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kNewArray, DataType::Type::kReference, SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, cls); SetRawInputAt(1, length); } @@ -4734,7 +4823,8 @@ class HAdd FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kAdd, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -4769,7 +4859,8 @@ class HSub FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kSub, result_type, left, right, SideEffects::None(), dex_pc) { + } template <typename T> static T Compute(T x, T y) { return x - y; } @@ -4802,7 +4893,8 @@ class HMul FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kMul, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -4837,7 +4929,8 @@ class HDiv FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kDiv, result_type, left, right, SideEffects::None(), dex_pc) { + } template <typename T> T ComputeIntegral(T x, T y) const { @@ -4884,7 +4977,8 @@ class HRem FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kRem, result_type, left, right, SideEffects::None(), dex_pc) { + } template <typename T> T ComputeIntegral(T x, T y) const { @@ -4925,12 +5019,123 @@ class HRem FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Rem); }; +class HMin FINAL : public HBinaryOperation { + public: + HMin(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc) + : HBinaryOperation(kMin, result_type, left, right, SideEffects::None(), dex_pc) {} + + bool IsCommutative() const OVERRIDE { return true; } + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x, T y) { + return (x <= y) ? x : y; + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + // TODO: Evaluation for floating-point values. + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + + DECLARE_INSTRUCTION(Min); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Min); +}; + +class HMax FINAL : public HBinaryOperation { + public: + HMax(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc) + : HBinaryOperation(kMax, result_type, left, right, SideEffects::None(), dex_pc) {} + + bool IsCommutative() const OVERRIDE { return true; } + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x, T y) { + return (x >= y) ? x : y; + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + // TODO: Evaluation for floating-point values. + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + + DECLARE_INSTRUCTION(Max); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Max); +}; + +class HAbs FINAL : public HUnaryOperation { + public: + HAbs(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) + : HUnaryOperation(kAbs, result_type, input, dex_pc) {} + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x) { + return x < 0 ? -x : x; + } + + // Evaluation for floating-point values. + // Note, as a "quality of implementation", rather than pure "spec compliance", + // we require that Math.abs() clears the sign bit (but changes nothing else) + // for all floating-point numbers, including NaN (signaling NaN may become quiet though). + // http://b/30758343 + template <typename T, typename S> static T ComputeFP(T x) { + S bits = bit_cast<S, T>(x); + return bit_cast<T, S>(bits & std::numeric_limits<S>::max()); + } + + HConstant* Evaluate(HIntConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(ComputeIntegral(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(ComputeIntegral(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HFloatConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetFloatConstant( + ComputeFP<float, int32_t>(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HDoubleConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetDoubleConstant( + ComputeFP<double, int64_t>(x->GetValue()), GetDexPc()); + } + + DECLARE_INSTRUCTION(Abs); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Abs); +}; + class HDivZeroCheck FINAL : public HExpression<1> { public: // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException` // constructor. HDivZeroCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kDivZeroCheck, value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, value); } @@ -4957,7 +5162,7 @@ class HShl FINAL : public HBinaryOperation { HInstruction* value, HInstruction* distance, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, value, distance, SideEffects::None(), dex_pc) { + : HBinaryOperation(kShl, result_type, value, distance, SideEffects::None(), dex_pc) { DCHECK_EQ(result_type, DataType::Kind(value->GetType())); DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(distance->GetType())); } @@ -5003,7 +5208,7 @@ class HShr FINAL : public HBinaryOperation { HInstruction* value, HInstruction* distance, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, value, distance, SideEffects::None(), dex_pc) { + : HBinaryOperation(kShr, result_type, value, distance, SideEffects::None(), dex_pc) { DCHECK_EQ(result_type, DataType::Kind(value->GetType())); DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(distance->GetType())); } @@ -5049,7 +5254,7 @@ class HUShr FINAL : public HBinaryOperation { HInstruction* value, HInstruction* distance, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, value, distance, SideEffects::None(), dex_pc) { + : HBinaryOperation(kUShr, result_type, value, distance, SideEffects::None(), dex_pc) { DCHECK_EQ(result_type, DataType::Kind(value->GetType())); DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(distance->GetType())); } @@ -5097,7 +5302,8 @@ class HAnd FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kAnd, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -5134,7 +5340,8 @@ class HOr FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kOr, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -5171,7 +5378,8 @@ class HXor FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kXor, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -5205,7 +5413,7 @@ class HXor FINAL : public HBinaryOperation { class HRor FINAL : public HBinaryOperation { public: HRor(DataType::Type result_type, HInstruction* value, HInstruction* distance) - : HBinaryOperation(result_type, value, distance) { + : HBinaryOperation(kRor, result_type, value, distance) { DCHECK_EQ(result_type, DataType::Kind(value->GetType())); DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(distance->GetType())); } @@ -5262,7 +5470,7 @@ class HParameterValue FINAL : public HExpression<0> { uint8_t index, DataType::Type parameter_type, bool is_this = false) - : HExpression(parameter_type, SideEffects::None(), kNoDexPc), + : HExpression(kParameterValue, parameter_type, SideEffects::None(), kNoDexPc), dex_file_(dex_file), type_index_(type_index), index_(index) { @@ -5301,7 +5509,8 @@ class HParameterValue FINAL : public HExpression<0> { class HNot FINAL : public HUnaryOperation { public: HNot(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HUnaryOperation(result_type, input, dex_pc) {} + : HUnaryOperation(kNot, result_type, input, dex_pc) { + } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { @@ -5334,7 +5543,8 @@ class HNot FINAL : public HUnaryOperation { class HBooleanNot FINAL : public HUnaryOperation { public: explicit HBooleanNot(HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HUnaryOperation(DataType::Type::kBool, input, dex_pc) {} + : HUnaryOperation(kBooleanNot, DataType::Type::kBool, input, dex_pc) { + } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { @@ -5372,7 +5582,7 @@ class HTypeConversion FINAL : public HExpression<1> { public: // Instantiate a type conversion of `input` to `result_type`. HTypeConversion(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HExpression(result_type, SideEffects::None(), dex_pc) { + : HExpression(kTypeConversion, result_type, SideEffects::None(), dex_pc) { SetRawInputAt(0, input); // Invariant: We should never generate a conversion to a Boolean value. DCHECK_NE(DataType::Type::kBool, result_type); @@ -5404,7 +5614,7 @@ class HNullCheck FINAL : public HExpression<1> { // `HNullCheck` can trigger GC, as it may call the `NullPointerException` // constructor. HNullCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kNullCheck, value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, value); } @@ -5474,7 +5684,10 @@ class HInstanceFieldGet FINAL : public HExpression<1> { uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HExpression(field_type, SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc), + : HExpression(kInstanceFieldGet, + field_type, + SideEffects::FieldReadOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -5534,7 +5747,9 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc), + : HTemplateInstruction(kInstanceFieldSet, + SideEffects::FieldWriteOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -5586,7 +5801,8 @@ class HArrayGet FINAL : public HExpression<2> { type, SideEffects::ArrayReadOfType(type), dex_pc, - /* is_string_char_at */ false) {} + /* is_string_char_at */ false) { + } HArrayGet(HInstruction* array, HInstruction* index, @@ -5594,7 +5810,7 @@ class HArrayGet FINAL : public HExpression<2> { SideEffects side_effects, uint32_t dex_pc, bool is_string_char_at) - : HExpression(type, side_effects, dex_pc) { + : HExpression(kArrayGet, type, side_effects, dex_pc) { SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at); SetRawInputAt(0, array); SetRawInputAt(1, index); @@ -5678,7 +5894,8 @@ class HArraySet FINAL : public HTemplateInstruction<3> { expected_component_type, // Make a best guess for side effects now, may be refined during SSA building. ComputeSideEffects(GetComponentType(value->GetType(), expected_component_type)), - dex_pc) {} + dex_pc) { + } HArraySet(HInstruction* array, HInstruction* index, @@ -5686,7 +5903,7 @@ class HArraySet FINAL : public HTemplateInstruction<3> { DataType::Type expected_component_type, SideEffects side_effects, uint32_t dex_pc) - : HTemplateInstruction(side_effects, dex_pc) { + : HTemplateInstruction(kArraySet, side_effects, dex_pc) { SetPackedField<ExpectedComponentTypeField>(expected_component_type); SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == DataType::Type::kReference); SetPackedFlag<kFlagValueCanBeNull>(true); @@ -5786,7 +6003,7 @@ class HArraySet FINAL : public HTemplateInstruction<3> { class HArrayLength FINAL : public HExpression<1> { public: HArrayLength(HInstruction* array, uint32_t dex_pc, bool is_string_length = false) - : HExpression(DataType::Type::kInt32, SideEffects::None(), dex_pc) { + : HExpression(kArrayLength, DataType::Type::kInt32, SideEffects::None(), dex_pc) { SetPackedFlag<kFlagIsStringLength>(is_string_length); // Note that arrays do not change length, so the instruction does not // depend on any write. @@ -5829,7 +6046,7 @@ class HBoundsCheck FINAL : public HExpression<2> { HInstruction* length, uint32_t dex_pc, bool is_string_char_at = false) - : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kBoundsCheck, index->GetType(), SideEffects::CanTriggerGC(), dex_pc) { DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(index->GetType())); SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at); SetRawInputAt(0, index); @@ -5862,7 +6079,9 @@ class HBoundsCheck FINAL : public HExpression<2> { class HSuspendCheck FINAL : public HTemplateInstruction<0> { public: explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) {} + : HTemplateInstruction(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc), + slow_path_(nullptr) { + } bool IsClonable() const OVERRIDE { return true; } @@ -5889,7 +6108,8 @@ class HSuspendCheck FINAL : public HTemplateInstruction<0> { class HNativeDebugInfo : public HTemplateInstruction<0> { public: explicit HNativeDebugInfo(uint32_t dex_pc) - : HTemplateInstruction<0>(SideEffects::None(), dex_pc) {} + : HTemplateInstruction<0>(kNativeDebugInfo, SideEffects::None(), dex_pc) { + } bool NeedsEnvironment() const OVERRIDE { return true; @@ -5947,7 +6167,7 @@ class HLoadClass FINAL : public HInstruction { bool is_referrers_class, uint32_t dex_pc, bool needs_access_check) - : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc), + : HInstruction(kLoadClass, SideEffectsForArchRuntimeCalls(), dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), type_index_(type_index), dex_file_(dex_file), @@ -6163,7 +6383,7 @@ class HLoadString FINAL : public HInstruction { dex::StringIndex string_index, const DexFile& dex_file, uint32_t dex_pc) - : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc), + : HInstruction(kLoadString, SideEffectsForArchRuntimeCalls(), dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), string_index_(string_index), dex_file_(dex_file) { @@ -6300,6 +6520,7 @@ class HClinitCheck FINAL : public HExpression<1> { public: HClinitCheck(HLoadClass* constant, uint32_t dex_pc) : HExpression( + kClinitCheck, DataType::Type::kReference, SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. dex_pc) { @@ -6342,7 +6563,10 @@ class HStaticFieldGet FINAL : public HExpression<1> { uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HExpression(field_type, SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc), + : HExpression(kStaticFieldGet, + field_type, + SideEffects::FieldReadOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -6399,7 +6623,9 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc), + : HTemplateInstruction(kStaticFieldSet, + SideEffects::FieldWriteOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -6442,7 +6668,10 @@ class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> { DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HExpression(field_type, SideEffects::AllExceptGCDependency(), dex_pc), + : HExpression(kUnresolvedInstanceFieldGet, + field_type, + SideEffects::AllExceptGCDependency(), + dex_pc), field_index_(field_index) { SetRawInputAt(0, obj); } @@ -6470,7 +6699,9 @@ class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> { DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc), + : HTemplateInstruction(kUnresolvedInstanceFieldSet, + SideEffects::AllExceptGCDependency(), + dex_pc), field_index_(field_index) { SetPackedField<FieldTypeField>(field_type); DCHECK_EQ(DataType::Kind(field_type), DataType::Kind(value->GetType())); @@ -6508,7 +6739,10 @@ class HUnresolvedStaticFieldGet FINAL : public HExpression<0> { HUnresolvedStaticFieldGet(DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HExpression(field_type, SideEffects::AllExceptGCDependency(), dex_pc), + : HExpression(kUnresolvedStaticFieldGet, + field_type, + SideEffects::AllExceptGCDependency(), + dex_pc), field_index_(field_index) { } @@ -6534,7 +6768,9 @@ class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> { DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc), + : HTemplateInstruction(kUnresolvedStaticFieldSet, + SideEffects::AllExceptGCDependency(), + dex_pc), field_index_(field_index) { SetPackedField<FieldTypeField>(field_type); DCHECK_EQ(DataType::Kind(field_type), DataType::Kind(value->GetType())); @@ -6570,7 +6806,8 @@ class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> { class HLoadException FINAL : public HExpression<0> { public: explicit HLoadException(uint32_t dex_pc = kNoDexPc) - : HExpression(DataType::Type::kReference, SideEffects::None(), dex_pc) {} + : HExpression(kLoadException, DataType::Type::kReference, SideEffects::None(), dex_pc) { + } bool CanBeNull() const OVERRIDE { return false; } @@ -6585,7 +6822,8 @@ class HLoadException FINAL : public HExpression<0> { class HClearException FINAL : public HTemplateInstruction<0> { public: explicit HClearException(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::AllWrites(), dex_pc) {} + : HTemplateInstruction(kClearException, SideEffects::AllWrites(), dex_pc) { + } DECLARE_INSTRUCTION(ClearException); @@ -6596,7 +6834,7 @@ class HClearException FINAL : public HTemplateInstruction<0> { class HThrow FINAL : public HTemplateInstruction<1> { public: HThrow(HInstruction* exception, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) { + : HTemplateInstruction(kThrow, SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, exception); } @@ -6637,7 +6875,8 @@ class HInstanceOf FINAL : public HExpression<2> { HLoadClass* target_class, TypeCheckKind check_kind, uint32_t dex_pc) - : HExpression(DataType::Type::kBool, + : HExpression(kInstanceOf, + DataType::Type::kBool, SideEffectsForArchRuntimeCalls(check_kind), dex_pc) { SetPackedField<TypeCheckKindField>(check_kind); @@ -6696,7 +6935,7 @@ class HInstanceOf FINAL : public HExpression<2> { class HBoundType FINAL : public HExpression<1> { public: explicit HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HExpression(DataType::Type::kReference, SideEffects::None(), dex_pc), + : HExpression(kBoundType, DataType::Type::kReference, SideEffects::None(), dex_pc), upper_bound_(ReferenceTypeInfo::CreateInvalid()) { SetPackedFlag<kFlagUpperCanBeNull>(true); SetPackedFlag<kFlagCanBeNull>(true); @@ -6746,7 +6985,7 @@ class HCheckCast FINAL : public HTemplateInstruction<2> { HLoadClass* target_class, TypeCheckKind check_kind, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) { + : HTemplateInstruction(kCheckCast, SideEffects::CanTriggerGC(), dex_pc) { SetPackedField<TypeCheckKindField>(check_kind); SetPackedFlag<kFlagMustDoNullCheck>(true); SetRawInputAt(0, object); @@ -6823,7 +7062,9 @@ class HMemoryBarrier FINAL : public HTemplateInstruction<0> { public: explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc) : HTemplateInstruction( - SideEffects::AllWritesAndReads(), dex_pc) { // Assume write/read on all fields/arrays. + kMemoryBarrier, + SideEffects::AllWritesAndReads(), // Assume write/read on all fields/arrays. + dex_pc) { SetPackedField<BarrierKindField>(barrier_kind); } @@ -6942,7 +7183,8 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction { // // If in a later phase we discover that there are no writes to reference final fields, // we can refine the side effect to a smaller set of type reads (see above constraints). - : HVariableInputSizeInstruction(SideEffects::AllReads(), + : HVariableInputSizeInstruction(kConstructorFence, + SideEffects::AllReads(), dex_pc, allocator, /* number_of_inputs */ 1, @@ -7009,6 +7251,7 @@ class HMonitorOperation FINAL : public HTemplateInstruction<1> { HMonitorOperation(HInstruction* object, OperationKind kind, uint32_t dex_pc) : HTemplateInstruction( + kMonitorOperation, SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. dex_pc) { SetPackedField<OperationKindField>(kind); @@ -7050,7 +7293,7 @@ class HSelect FINAL : public HExpression<3> { HInstruction* true_value, HInstruction* false_value, uint32_t dex_pc) - : HExpression(HPhi::ToPhiType(true_value->GetType()), SideEffects::None(), dex_pc) { + : HExpression(kSelect, HPhi::ToPhiType(true_value->GetType()), SideEffects::None(), dex_pc) { DCHECK_EQ(HPhi::ToPhiType(true_value->GetType()), HPhi::ToPhiType(false_value->GetType())); // First input must be `true_value` or `false_value` to allow codegens to @@ -7163,7 +7406,7 @@ static constexpr size_t kDefaultNumberOfMoves = 4; class HParallelMove FINAL : public HTemplateInstruction<0> { public: explicit HParallelMove(ArenaAllocator* allocator, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc), + : HTemplateInstruction(kParallelMove, SideEffects::None(), dex_pc), moves_(allocator->Adapter(kArenaAllocMoveOperands)) { moves_.reserve(kDefaultNumberOfMoves); } @@ -7225,7 +7468,10 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { class HIntermediateAddress FINAL : public HExpression<2> { public: HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc) - : HExpression(DataType::Type::kInt32, SideEffects::DependsOnGC(), dex_pc) { + : HExpression(kIntermediateAddress, + DataType::Type::kInt32, + SideEffects::DependsOnGC(), + dex_pc) { DCHECK_EQ(DataType::Size(DataType::Type::kInt32), DataType::Size(DataType::Type::kReference)) << "kPrimInt and kPrimNot have different sizes."; diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h index 2c0595e3d8..d0e0fef946 100644 --- a/compiler/optimizing/nodes_mips.h +++ b/compiler/optimizing/nodes_mips.h @@ -24,7 +24,11 @@ class HMipsComputeBaseMethodAddress : public HExpression<0> { public: // Treat the value as an int32_t, but it is really a 32 bit native pointer. HMipsComputeBaseMethodAddress() - : HExpression(DataType::Type::kInt32, SideEffects::None(), kNoDexPc) {} + : HExpression(kMipsComputeBaseMethodAddress, + DataType::Type::kInt32, + SideEffects::None(), + kNoDexPc) { + } bool CanBeMoved() const OVERRIDE { return true; } @@ -42,7 +46,7 @@ class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { HInstruction* input, HMipsComputeBaseMethodAddress* method_base, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None(), dex_pc), + : HTemplateInstruction(kMipsPackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); @@ -90,7 +94,10 @@ class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { class HIntermediateArrayAddressIndex FINAL : public HExpression<2> { public: HIntermediateArrayAddressIndex(HInstruction* index, HInstruction* shift, uint32_t dex_pc) - : HExpression(DataType::Type::kInt32, SideEffects::None(), dex_pc) { + : HExpression(kIntermediateArrayAddressIndex, + DataType::Type::kInt32, + SideEffects::None(), + dex_pc) { SetRawInputAt(0, index); SetRawInputAt(1, shift); } diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index e837f1e7e0..29358e1141 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -32,7 +32,8 @@ class HMultiplyAccumulate FINAL : public HExpression<3> { HInstruction* mul_left, HInstruction* mul_right, uint32_t dex_pc = kNoDexPc) - : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) { + : HExpression(kMultiplyAccumulate, type, SideEffects::None(), dex_pc), + op_kind_(op) { SetRawInputAt(kInputAccumulatorIndex, accumulator); SetRawInputAt(kInputMulLeftIndex, mul_left); SetRawInputAt(kInputMulRightIndex, mul_right); @@ -68,7 +69,12 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation { HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc), + : HBinaryOperation(kBitwiseNegatedRight, + result_type, + left, + right, + SideEffects::None(), + dex_pc), op_kind_(op) { DCHECK(op == HInstruction::kAnd || op == HInstruction::kOr || op == HInstruction::kXor) << op; } @@ -143,7 +149,10 @@ class HIntermediateAddressIndex FINAL : public HExpression<3> { public: HIntermediateAddressIndex( HInstruction* index, HInstruction* offset, HInstruction* shift, uint32_t dex_pc) - : HExpression(DataType::Type::kInt32, SideEffects::None(), dex_pc) { + : HExpression(kIntermediateAddressIndex, + DataType::Type::kInt32, + SideEffects::None(), + dex_pc) { SetRawInputAt(0, index); SetRawInputAt(1, offset); SetRawInputAt(2, shift); @@ -193,7 +202,7 @@ class HDataProcWithShifterOp FINAL : public HExpression<2> { // is an extension. int shift = 0, uint32_t dex_pc = kNoDexPc) - : HExpression(instr->GetType(), SideEffects::None(), dex_pc), + : HExpression(kDataProcWithShifterOp, instr->GetType(), SideEffects::None(), dex_pc), instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift & (instr->GetType() == DataType::Type::kInt32 ? kMaxIntShiftDistance diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index ecabdf3b76..0d38d57375 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -71,13 +71,15 @@ class HVecOperation : public HVariableInputSizeInstruction { // TODO: we could introduce SIMD types in HIR. static constexpr DataType::Type kSIMDType = DataType::Type::kFloat64; - HVecOperation(ArenaAllocator* allocator, + HVecOperation(InstructionKind kind, + ArenaAllocator* allocator, DataType::Type packed_type, SideEffects side_effects, size_t number_of_inputs, size_t vector_length, uint32_t dex_pc) - : HVariableInputSizeInstruction(side_effects, + : HVariableInputSizeInstruction(kind, + side_effects, dex_pc, allocator, number_of_inputs, @@ -196,12 +198,14 @@ class HVecOperation : public HVariableInputSizeInstruction { // Abstraction of a unary vector operation. class HVecUnaryOperation : public HVecOperation { public: - HVecUnaryOperation(ArenaAllocator* allocator, + HVecUnaryOperation(InstructionKind kind, + ArenaAllocator* allocator, HInstruction* input, DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecOperation(allocator, + : HVecOperation(kind, + allocator, packed_type, SideEffects::None(), /* number_of_inputs */ 1, @@ -221,13 +225,15 @@ class HVecUnaryOperation : public HVecOperation { // Abstraction of a binary vector operation. class HVecBinaryOperation : public HVecOperation { public: - HVecBinaryOperation(ArenaAllocator* allocator, + HVecBinaryOperation(InstructionKind kind, + ArenaAllocator* allocator, HInstruction* left, HInstruction* right, DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecOperation(allocator, + : HVecOperation(kind, + allocator, packed_type, SideEffects::None(), /* number_of_inputs */ 2, @@ -250,13 +256,15 @@ class HVecBinaryOperation : public HVecOperation { // The Android runtime guarantees elements have at least natural alignment. class HVecMemoryOperation : public HVecOperation { public: - HVecMemoryOperation(ArenaAllocator* allocator, + HVecMemoryOperation(InstructionKind kind, + ArenaAllocator* allocator, DataType::Type packed_type, SideEffects side_effects, size_t number_of_inputs, size_t vector_length, uint32_t dex_pc) - : HVecOperation(allocator, + : HVecOperation(kind, + allocator, packed_type, side_effects, number_of_inputs, @@ -315,7 +323,8 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecUnaryOperation(allocator, scalar, packed_type, vector_length, dex_pc) { + : HVecUnaryOperation( + kVecReplicateScalar, allocator, scalar, packed_type, vector_length, dex_pc) { DCHECK(!scalar->IsVecOperation()); } @@ -341,7 +350,8 @@ class HVecExtractScalar FINAL : public HVecUnaryOperation { size_t vector_length, size_t index, uint32_t dex_pc) - : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) { + : HVecUnaryOperation( + kVecExtractScalar, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(input, packed_type)); DCHECK_LT(index, vector_length); DCHECK_EQ(index, 0u); @@ -379,7 +389,7 @@ class HVecReduce FINAL : public HVecUnaryOperation { size_t vector_length, ReductionKind kind, uint32_t dex_pc) - : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc), + : HVecUnaryOperation(kVecReduce, allocator, input, packed_type, vector_length, dex_pc), kind_(kind) { DCHECK(HasConsistentPackedTypes(input, packed_type)); } @@ -412,7 +422,7 @@ class HVecCnv FINAL : public HVecUnaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) { + : HVecUnaryOperation(kVecCnv, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(input->IsVecOperation()); DCHECK_NE(GetInputType(), GetResultType()); // actual convert } @@ -437,7 +447,7 @@ class HVecNeg FINAL : public HVecUnaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) { + : HVecUnaryOperation(kVecNeg, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(input, packed_type)); } @@ -459,7 +469,7 @@ class HVecAbs FINAL : public HVecUnaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) { + : HVecUnaryOperation(kVecAbs, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(input, packed_type)); } @@ -481,7 +491,7 @@ class HVecNot FINAL : public HVecUnaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) { + : HVecUnaryOperation(kVecNot, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(input->IsVecOperation()); } @@ -507,7 +517,7 @@ class HVecAdd FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecAdd, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -533,7 +543,8 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation { size_t vector_length, bool is_rounded, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation( + kVecHalvingAdd, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); SetPackedFlag<kFieldHAddIsRounded>(is_rounded); @@ -571,7 +582,7 @@ class HVecSub FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecSub, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -594,7 +605,7 @@ class HVecMul FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecMul, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -617,7 +628,7 @@ class HVecDiv FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecDiv, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -641,7 +652,7 @@ class HVecMin FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecMin, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -665,7 +676,7 @@ class HVecMax FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecMax, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -688,7 +699,7 @@ class HVecAnd FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecAnd, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } @@ -710,7 +721,8 @@ class HVecAndNot FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation( + kVecAndNot, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } @@ -732,7 +744,7 @@ class HVecOr FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecOr, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } @@ -754,7 +766,7 @@ class HVecXor FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecXor, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } @@ -776,7 +788,7 @@ class HVecShl FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecShl, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); } @@ -798,7 +810,7 @@ class HVecShr FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecShr, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); } @@ -820,7 +832,7 @@ class HVecUShr FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { + : HVecBinaryOperation(kVecUShr, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); } @@ -847,7 +859,8 @@ class HVecSetScalars FINAL : public HVecOperation { size_t vector_length, size_t number_of_scalars, uint32_t dex_pc) - : HVecOperation(allocator, + : HVecOperation(kVecSetScalars, + allocator, packed_type, SideEffects::None(), number_of_scalars, @@ -881,7 +894,8 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecOperation(allocator, + : HVecOperation(kVecMultiplyAccumulate, + allocator, packed_type, SideEffects::None(), /* number_of_inputs */ 3, @@ -931,7 +945,8 @@ class HVecSADAccumulate FINAL : public HVecOperation { DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecOperation(allocator, + : HVecOperation(kVecSADAccumulate, + allocator, packed_type, SideEffects::None(), /* number_of_inputs */ 3, @@ -965,7 +980,8 @@ class HVecLoad FINAL : public HVecMemoryOperation { size_t vector_length, bool is_string_char_at, uint32_t dex_pc) - : HVecMemoryOperation(allocator, + : HVecMemoryOperation(kVecLoad, + allocator, packed_type, side_effects, /* number_of_inputs */ 2, @@ -1010,7 +1026,8 @@ class HVecStore FINAL : public HVecMemoryOperation { SideEffects side_effects, size_t vector_length, uint32_t dex_pc) - : HVecMemoryOperation(allocator, + : HVecMemoryOperation(kVecStore, + allocator, packed_type, side_effects, /* number_of_inputs */ 3, diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index 6326065fe2..4c32be7d15 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -24,7 +24,11 @@ class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> { public: // Treat the value as an int32_t, but it is really a 32 bit native pointer. HX86ComputeBaseMethodAddress() - : HExpression(DataType::Type::kInt32, SideEffects::None(), kNoDexPc) {} + : HExpression(kX86ComputeBaseMethodAddress, + DataType::Type::kInt32, + SideEffects::None(), + kNoDexPc) { + } bool CanBeMoved() const OVERRIDE { return true; } @@ -39,7 +43,10 @@ class HX86LoadFromConstantTable FINAL : public HExpression<2> { public: HX86LoadFromConstantTable(HX86ComputeBaseMethodAddress* method_base, HConstant* constant) - : HExpression(constant->GetType(), SideEffects::None(), kNoDexPc) { + : HExpression(kX86LoadFromConstantTable, + constant->GetType(), + SideEffects::None(), + kNoDexPc) { SetRawInputAt(0, method_base); SetRawInputAt(1, constant); } @@ -65,7 +72,7 @@ class HX86FPNeg FINAL : public HExpression<2> { HInstruction* input, HX86ComputeBaseMethodAddress* method_base, uint32_t dex_pc) - : HExpression(result_type, SideEffects::None(), dex_pc) { + : HExpression(kX86FPNeg, result_type, SideEffects::None(), dex_pc) { DCHECK(DataType::IsFloatingPointType(result_type)); SetRawInputAt(0, input); SetRawInputAt(1, method_base); @@ -89,7 +96,7 @@ class HX86PackedSwitch FINAL : public HTemplateInstruction<2> { HInstruction* input, HX86ComputeBaseMethodAddress* method_base, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None(), dex_pc), + : HTemplateInstruction(kX86PackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index b3f23a0dcd..e42dfc10ba 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -1411,13 +1411,12 @@ void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method, debug::MethodDe mini_debug_info, ArrayRef<const debug::MethodDebugInfo>(&info, 1)); MutexLock mu(Thread::Current(), *Locks::native_debug_interface_lock_); - JITCodeEntry* entry = CreateJITCodeEntry(elf_file); - IncrementJITCodeEntryRefcount(entry, info.code_address); + AddNativeDebugInfoForJit(reinterpret_cast<const void*>(info.code_address), elf_file); VLOG(jit) << "JIT mini-debug-info added for " << ArtMethod::PrettyMethod(method) << " size=" << PrettySize(elf_file.size()) - << " total_size=" << PrettySize(GetJITCodeEntryMemUsage()); + << " total_size=" << PrettySize(GetJitNativeDebugInfoMemUsage()); } } // namespace art diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 0023265e50..00194ff1fe 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -22,9 +22,9 @@ #include <string> #include <type_traits> -#include "atomic.h" +#include "base/atomic.h" +#include "base/globals.h" #include "base/logging.h" // For VLOG_IS_ON. -#include "globals.h" namespace art { diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index f92f4b274a..2591783cb6 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -238,6 +238,9 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { case Intrinsics::kMathMaxFloatFloat: case Intrinsics::kMathMinDoubleDouble: case Intrinsics::kMathMinFloatFloat: + LOG(FATAL) << "Unreachable min/max/abs: intrinsics should have been lowered " + "to IR nodes by instruction simplifier"; + UNREACHABLE(); case Intrinsics::kMathRoundFloat: if (!base_added) { DCHECK(invoke_static_or_direct != nullptr); diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index bb28d50b56..bca538fb17 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -667,7 +667,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { // HUnaryOperation (or HBinaryOperation), check in debug mode that we have // the exhaustive lists here. if (instruction->IsUnaryOperation()) { - DCHECK(instruction->IsBooleanNot() || + DCHECK(instruction->IsAbs() || + instruction->IsBooleanNot() || instruction->IsNot() || instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName(); return true; @@ -678,6 +679,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { instruction->IsCompare() || instruction->IsCondition() || instruction->IsDiv() || + instruction->IsMin() || + instruction->IsMax() || instruction->IsMul() || instruction->IsOr() || instruction->IsRem() || |