diff options
Diffstat (limited to 'compiler')
48 files changed, 3718 insertions, 114 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index db338f0538..f2f4550f05 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -98,6 +98,8 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/gvn.cc \ optimizing/inliner.cc \ optimizing/instruction_simplifier.cc \ + optimizing/intrinsics.cc \ + optimizing/intrinsics_x86_64.cc \ optimizing/locations.cc \ optimizing/nodes.cc \ optimizing/optimization.cc \ @@ -122,6 +124,8 @@ LIBART_COMPILER_SRC_FILES := \ utils/dwarf_cfi.cc \ utils/mips/assembler_mips.cc \ utils/mips/managed_register_mips.cc \ + utils/mips64/assembler_mips64.cc \ + utils/mips64/managed_register_mips64.cc \ utils/x86/assembler_x86.cc \ utils/x86/managed_register_x86.cc \ utils/x86_64/assembler_x86_64.cc \ diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index 7df71f5b8a..96f8e0c355 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -164,7 +164,7 @@ void CommonCompilerTest::SetUp() { compiler_kind, instruction_set, instruction_set_features_.get(), true, new std::set<std::string>, nullptr, - 2, true, true, timer_.get(), -1, "")); + 2, true, true, "", timer_.get(), -1, "")); } // We typically don't generate an image in unit tests, disable this optimization by default. compiler_driver_->SetSupportBootImageFixup(false); diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc index 234e8b96f6..22be28c4d9 100644 --- a/compiler/compiled_method.cc +++ b/compiler/compiled_method.cc @@ -63,6 +63,7 @@ size_t CompiledCode::CodeDelta(InstructionSet instruction_set) { case kArm: case kArm64: case kMips: + case kMips64: case kX86: case kX86_64: return 0; @@ -82,6 +83,7 @@ const void* CompiledCode::CodePointer(const void* code_pointer, case kArm: case kArm64: case kMips: + case kMips64: case kX86: case kX86_64: return code_pointer; diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index 30398527cd..84c0d93008 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -293,9 +293,9 @@ const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, kInlineIntrinsic, { d } } } INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0), - INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0), + INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, kIntrinsicFlagToFloatingPoint), INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0), - INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0), + INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, kIntrinsicFlagToFloatingPoint), INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, k32), INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, k64), diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 9985d66469..56bed39bf1 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -339,7 +339,8 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, const InstructionSetFeatures* instruction_set_features, bool image, std::set<std::string>* image_classes, std::set<std::string>* compiled_classes, size_t thread_count, - bool dump_stats, bool dump_passes, CumulativeLogger* timer, + bool dump_stats, bool dump_passes, + const std::string& dump_cfg_file_name, CumulativeLogger* timer, int swap_fd, const std::string& profile_file) : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)), swap_space_allocator_(new SwapAllocator<void>(swap_space_.get())), @@ -361,9 +362,10 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, stats_(new AOTCompilationStats), dump_stats_(dump_stats), dump_passes_(dump_passes), + dump_cfg_file_name_(dump_cfg_file_name), timings_logger_(timer), compiler_context_(nullptr), - support_boot_image_fixup_(instruction_set != kMips), + support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64), dedupe_code_("dedupe code", *swap_space_allocator_), dedupe_src_mapping_table_("dedupe source mapping table", *swap_space_allocator_), dedupe_mapping_table_("dedupe mapping table", *swap_space_allocator_), @@ -2094,6 +2096,7 @@ static bool InstructionSetHasGenericJniStub(InstructionSet isa) { case kArm64: case kThumb2: case kMips: + case kMips64: case kX86: case kX86_64: return true; default: return false; diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 7ddc32cdd8..11b4329e32 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -97,6 +97,7 @@ class CompilerDriver { bool image, std::set<std::string>* image_classes, std::set<std::string>* compiled_classes, size_t thread_count, bool dump_stats, bool dump_passes, + const std::string& dump_cfg_file_name, CumulativeLogger* timer, int swap_fd, const std::string& profile_file); @@ -371,6 +372,10 @@ class CompilerDriver { return dump_passes_; } + const std::string& GetDumpCfgFileName() const { + return dump_cfg_file_name_; + } + CumulativeLogger* GetTimingsLogger() const { return timings_logger_; } @@ -542,6 +547,7 @@ class CompilerDriver { bool dump_stats_; const bool dump_passes_; + const std::string& dump_cfg_file_name_; CumulativeLogger* const timings_logger_; diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h index 273b62deee..94268de077 100644 --- a/compiler/elf_builder.h +++ b/compiler/elf_builder.h @@ -1108,6 +1108,14 @@ class ElfBuilder FINAL { EF_MIPS_ARCH_32R2); break; } + case kMips64: { + elf_header_.e_machine = EM_MIPS; + elf_header_.e_flags = (EF_MIPS_NOREORDER | + EF_MIPS_PIC | + EF_MIPS_CPIC | + EF_MIPS_ARCH_64R6); + break; + } default: { fatal_error_ = true; LOG(FATAL) << "Unknown instruction set: " << isa; diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index 281e3fe109..f513ea8124 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -973,6 +973,9 @@ void Java_MyClassNatives_staticMethodThatShouldTakeClass(JNIEnv*, jclass, jclass } void JniCompilerTest::UpcallArgumentTypeChecking_InstanceImpl() { + // This will lead to error messages in the log. + ScopedLogSeverity sls(LogSeverity::FATAL); + SetUpForTest(false, "instanceMethodThatShouldTakeClass", "(ILjava/lang/Class;)V", reinterpret_cast<void*>(&Java_MyClassNatives_instanceMethodThatShouldTakeClass)); @@ -985,6 +988,9 @@ void JniCompilerTest::UpcallArgumentTypeChecking_InstanceImpl() { JNI_TEST(UpcallArgumentTypeChecking_Instance) void JniCompilerTest::UpcallArgumentTypeChecking_StaticImpl() { + // This will lead to error messages in the log. + ScopedLogSeverity sls(LogSeverity::FATAL); + SetUpForTest(true, "staticMethodThatShouldTakeClass", "(ILjava/lang/Class;)V", reinterpret_cast<void*>(&Java_MyClassNatives_staticMethodThatShouldTakeClass)); @@ -1475,6 +1481,9 @@ void JniCompilerTest::MaxParamNumberImpl() { JNI_TEST(MaxParamNumber) void JniCompilerTest::WithoutImplementationImpl() { + // This will lead to error messages in the log. + ScopedLogSeverity sls(LogSeverity::FATAL); + SetUpForTest(false, "withoutImplementation", "()V", nullptr); env_->CallVoidMethod(jobj_, jmethod_); diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index b3ab370c99..d3d20555d8 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -92,7 +92,7 @@ TEST_F(OatTest, WriteRead) { method_inliner_map_.get(), compiler_kind, insn_set, insn_features.get(), false, nullptr, nullptr, 2, true, - true, timer_.get(), -1, "")); + true, "", timer_.get(), -1, "")); jobject class_loader = nullptr; if (kCompile) { TimingLogger timings2("OatTest::WriteRead", false, false); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index f6ca6c740e..9c2facb75e 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -604,7 +604,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, HInvoke* invoke = nullptr; if (optimized_invoke_type == kVirtual) { invoke = new (arena_) HInvokeVirtual( - arena_, number_of_arguments, return_type, dex_pc, table_index); + arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index); } else if (optimized_invoke_type == kInterface) { invoke = new (arena_) HInvokeInterface( arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 0c1ff9bff5..9e8907078b 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -54,6 +54,7 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { + GetGraph()->GetTemporariesVRegSlots() + 1 /* filler */, 0, /* the baseline compiler does not have live registers at slow path */ + 0, /* the baseline compiler does not have live registers at slow path */ GetGraph()->GetMaximumNumberOfOutVRegs() + 1 /* current method */); GenerateFrameEntry(); @@ -136,14 +137,16 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l } void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots, - size_t maximum_number_of_live_registers, + size_t maximum_number_of_live_core_registers, + size_t maximum_number_of_live_fp_registers, size_t number_of_out_slots) { first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; SetFrameSize(RoundUp( number_of_spill_slots * kVRegSize + number_of_out_slots * kVRegSize - + maximum_number_of_live_registers * GetWordSize() + + maximum_number_of_live_core_registers * GetWordSize() + + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() + FrameEntrySpillSize(), kStackAlignment)); } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 8d28f3da25..88e50b6c88 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -109,9 +109,11 @@ class CodeGenerator { virtual HGraphVisitor* GetInstructionVisitor() = 0; virtual Assembler* GetAssembler() = 0; virtual size_t GetWordSize() const = 0; + virtual size_t GetFloatingPointSpillSlotSize() const = 0; virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; void ComputeFrameSize(size_t number_of_spill_slots, - size_t maximum_number_of_live_registers, + size_t maximum_number_of_live_core_registers, + size_t maximum_number_of_live_fp_registers, size_t number_of_out_slots); virtual size_t FrameEntrySpillSize() const = 0; int32_t GetStackSlot(HLocal* local) const; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 1862061bcf..c4ba0fd3e5 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1190,7 +1190,7 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); // temp = temp[index_in_cache] __ LoadFromOffset( - kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())); + kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); // LR = temp[offset_of_quick_compiled_code] __ LoadFromOffset(kLoadWord, LR, temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 8b29b159ab..267d9a2cef 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -169,6 +169,11 @@ class CodeGeneratorARM : public CodeGenerator { return kArmWordSize; } + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + // Allocated in S registers, which are word sized. + return kArmWordSize; + } + size_t FrameEntrySpillSize() const OVERRIDE; HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7b19f44e78..6d2c3de5d5 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1975,7 +1975,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir // Make sure that ArtMethod* is passed in W0 as per the calling convention DCHECK(temp.Is(w0)); size_t index_in_cache = mirror::Array::DataOffset(kHeapRefSize).SizeValue() + - invoke->GetIndexInDexCache() * kHeapRefSize; + invoke->GetDexMethodIndex() * kHeapRefSize; // TODO: Implement all kinds of calls: // 1) boot -> boot diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index e4da07be43..590bc1d778 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -191,6 +191,11 @@ class CodeGeneratorARM64 : public CodeGenerator { return kArm64WordSize; } + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + // Allocated in D registers, which are word sized. + return kArm64WordSize; + } + uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE { vixl::Label* block_entry_label = GetLabelOf(block); DCHECK(block_entry_label->IsBound()); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 04e36cc58a..1a0df44ea6 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1135,7 +1135,7 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec // temp = temp->dex_cache_resolved_methods_; __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); // temp = temp[index_in_cache] - __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()))); + __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); // (temp + offset_of_quick_compiled_code)() __ call(Address( temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index acde122917..2d8adb2cf1 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -166,6 +166,11 @@ class CodeGeneratorX86 : public CodeGenerator { return kX86WordSize; } + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + // 8 bytes == 2 words for each spill. + return 2 * kX86WordSize; + } + size_t FrameEntrySpillSize() const OVERRIDE; HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5fc24f71e6..3d7f122d36 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -18,6 +18,8 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "intrinsics.h" +#include "intrinsics_x86_64.h" #include "mirror/array-inl.h" #include "mirror/art_method.h" #include "mirror/class.h" @@ -61,20 +63,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatR #define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())-> -class SlowPathCodeX86_64 : public SlowPathCode { - public: - SlowPathCodeX86_64() : entry_label_(), exit_label_() {} - - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } - - private: - Label entry_label_; - Label exit_label_; - - DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64); -}; - class NullCheckSlowPathX86_64 : public SlowPathCodeX86_64 { public: explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {} @@ -375,6 +363,31 @@ inline Condition X86_64Condition(IfCondition cond) { return kEqual; } +void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, + CpuRegister temp) { + // All registers are assumed to be correctly set up. + + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + + // temp = method; + LoadCurrentMethod(temp); + // temp = temp->dex_cache_resolved_methods_; + __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); + // temp = temp[index_in_cache] + __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kX86_64WordSize).SizeValue())); + + DCHECK(!IsLeafMethod()); + RecordPcInfo(invoke, invoke->GetDexPc()); +} + void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { stream << X86_64ManagedRegister::FromCpuRegister(Register(reg)); } @@ -1123,30 +1136,31 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } -void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); - // TODO: Implement all kinds of calls: - // 1) boot -> boot - // 2) app -> boot - // 3) app -> app - // - // Currently we implement the app -> app logic, which looks up in the resolve cache. +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) { + if (invoke->GetLocations()->Intrinsified()) { + IntrinsicCodeGeneratorX86_64 intrinsic(codegen); + intrinsic.Dispatch(invoke); + return true; + } + return false; +} - // temp = method; - codegen_->LoadCurrentMethod(temp); - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); - // temp = temp[index_in_cache] - __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()))); - // (temp + offset_of_quick_compiled_code)() - __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86_64WordSize).SizeValue())); +void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } - DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + codegen_->GenerateStaticOrDirectCall( + invoke, + invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>()); } void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { @@ -1182,10 +1196,19 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { } void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { + IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } + CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 87f6b0f779..c501568a89 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -36,6 +36,8 @@ static constexpr FloatRegister kParameterFloatRegisters[] = static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); +static constexpr bool kCoalescedImplicitNullCheck = false; + class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> { public: InvokeDexCallingConvention() : CallingConvention( @@ -67,7 +69,20 @@ class InvokeDexCallingConventionVisitor { }; class CodeGeneratorX86_64; -class SlowPathCodeX86_64; + +class SlowPathCodeX86_64 : public SlowPathCode { + public: + SlowPathCodeX86_64() : entry_label_(), exit_label_() {} + + Label* GetEntryLabel() { return &entry_label_; } + Label* GetExitLabel() { return &exit_label_; } + + private: + Label entry_label_; + Label exit_label_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64); +}; class ParallelMoveResolverX86_64 : public ParallelMoveResolver { public: @@ -169,6 +184,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { return kX86_64WordSize; } + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + return kX86_64WordSize; + } + size_t FrameEntrySpillSize() const OVERRIDE; HGraphVisitor* GetLocationBuilder() OVERRIDE { @@ -222,6 +241,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { return false; } + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 493d93f052..532167c179 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -44,10 +44,10 @@ void HInliner::Run() { instr_it.Advance()) { HInvokeStaticOrDirect* current = instr_it.Current()->AsInvokeStaticOrDirect(); if (current != nullptr) { - if (!TryInline(current, current->GetIndexInDexCache(), current->GetInvokeType())) { + if (!TryInline(current, current->GetDexMethodIndex(), current->GetInvokeType())) { if (kIsDebugBuild) { std::string callee_name = - PrettyMethod(current->GetIndexInDexCache(), *outer_compilation_unit_.GetDexFile()); + PrettyMethod(current->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile()); bool should_inline = callee_name.find("$inline$") != std::string::npos; CHECK(!should_inline) << "Could not inline " << callee_name; } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc new file mode 100644 index 0000000000..fe0e7f2eb2 --- /dev/null +++ b/compiler/optimizing/intrinsics.cc @@ -0,0 +1,366 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics.h" + +#include "dex/quick/dex_file_method_inliner.h" +#include "dex/quick/dex_file_to_method_inliner_map.h" +#include "driver/compiler_driver.h" +#include "invoke_type.h" +#include "nodes.h" +#include "quick/inline_method_analyser.h" + +namespace art { + +// Function that returns whether an intrinsic is static/direct or virtual. +static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) { + switch (i) { + case Intrinsics::kNone: + return kInterface; // Non-sensical for intrinsic. +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + case Intrinsics::k ## Name: \ + return IsStatic; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return kInterface; +} + + + +static Primitive::Type GetType(uint64_t data, bool is_op_size) { + if (is_op_size) { + switch (static_cast<OpSize>(data)) { + case kSignedByte: + return Primitive::Type::kPrimByte; + case kSignedHalf: + return Primitive::Type::kPrimShort; + case k32: + return Primitive::Type::kPrimInt; + case k64: + return Primitive::Type::kPrimLong; + default: + LOG(FATAL) << "Unknown/unsupported op size " << data; + UNREACHABLE(); + } + } else { + if ((data & kIntrinsicFlagIsLong) != 0) { + return Primitive::Type::kPrimLong; + } + if ((data & kIntrinsicFlagIsObject) != 0) { + return Primitive::Type::kPrimNot; + } + return Primitive::Type::kPrimInt; + } +} + +static Intrinsics GetIntrinsic(InlineMethod method) { + switch (method.opcode) { + // Floating-point conversions. + case kIntrinsicDoubleCvt: + return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ? + Intrinsics::kDoubleDoubleToRawLongBits : Intrinsics::kDoubleLongBitsToDouble; + case kIntrinsicFloatCvt: + return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ? + Intrinsics::kFloatFloatToRawIntBits : Intrinsics::kFloatIntBitsToFloat; + + // Bit manipulations. + case kIntrinsicReverseBits: + switch (GetType(method.d.data, true)) { + case Primitive::Type::kPrimInt: + return Intrinsics::kIntegerReverse; + case Primitive::Type::kPrimLong: + return Intrinsics::kLongReverse; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + case kIntrinsicReverseBytes: + switch (GetType(method.d.data, true)) { + case Primitive::Type::kPrimShort: + return Intrinsics::kShortReverseBytes; + case Primitive::Type::kPrimInt: + return Intrinsics::kIntegerReverseBytes; + case Primitive::Type::kPrimLong: + return Intrinsics::kLongReverseBytes; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + + // Abs. + case kIntrinsicAbsDouble: + return Intrinsics::kMathAbsDouble; + case kIntrinsicAbsFloat: + return Intrinsics::kMathAbsFloat; + case kIntrinsicAbsInt: + return Intrinsics::kMathAbsInt; + case kIntrinsicAbsLong: + return Intrinsics::kMathAbsLong; + + // Min/max. + case kIntrinsicMinMaxDouble: + return ((method.d.data & kIntrinsicFlagMin) == 0) ? + Intrinsics::kMathMaxDoubleDouble : Intrinsics::kMathMinDoubleDouble; + case kIntrinsicMinMaxFloat: + return ((method.d.data & kIntrinsicFlagMin) == 0) ? + Intrinsics::kMathMaxFloatFloat : Intrinsics::kMathMinFloatFloat; + case kIntrinsicMinMaxInt: + return ((method.d.data & kIntrinsicFlagMin) == 0) ? + Intrinsics::kMathMaxIntInt : Intrinsics::kMathMinIntInt; + case kIntrinsicMinMaxLong: + return ((method.d.data & kIntrinsicFlagMin) == 0) ? + Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong; + + // Misc math. + case kIntrinsicSqrt: + return Intrinsics::kMathSqrt; + case kIntrinsicCeil: + return Intrinsics::kMathCeil; + case kIntrinsicFloor: + return Intrinsics::kMathFloor; + case kIntrinsicRint: + return Intrinsics::kMathRint; + case kIntrinsicRoundDouble: + return Intrinsics::kMathRoundDouble; + case kIntrinsicRoundFloat: + return Intrinsics::kMathRoundFloat; + + // System.arraycopy. + case kIntrinsicSystemArrayCopyCharArray: + return Intrinsics::kSystemArrayCopyChar; + + // Thread.currentThread. + case kIntrinsicCurrentThread: + return Intrinsics::kThreadCurrentThread; + + // Memory.peek. + case kIntrinsicPeek: + switch (GetType(method.d.data, true)) { + case Primitive::Type::kPrimByte: + return Intrinsics::kMemoryPeekByte; + case Primitive::Type::kPrimShort: + return Intrinsics::kMemoryPeekShortNative; + case Primitive::Type::kPrimInt: + return Intrinsics::kMemoryPeekIntNative; + case Primitive::Type::kPrimLong: + return Intrinsics::kMemoryPeekLongNative; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + + // Memory.poke. + case kIntrinsicPoke: + switch (GetType(method.d.data, true)) { + case Primitive::Type::kPrimByte: + return Intrinsics::kMemoryPokeByte; + case Primitive::Type::kPrimShort: + return Intrinsics::kMemoryPokeShortNative; + case Primitive::Type::kPrimInt: + return Intrinsics::kMemoryPokeIntNative; + case Primitive::Type::kPrimLong: + return Intrinsics::kMemoryPokeLongNative; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + + // String. + case kIntrinsicCharAt: + return Intrinsics::kStringCharAt; + case kIntrinsicCompareTo: + return Intrinsics::kStringCompareTo; + case kIntrinsicIsEmptyOrLength: + return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ? + Intrinsics::kStringLength : Intrinsics::kStringIsEmpty; + case kIntrinsicIndexOf: + return ((method.d.data & kIntrinsicFlagBase0) == 0) ? + Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf; + + case kIntrinsicCas: + switch (GetType(method.d.data, false)) { + case Primitive::Type::kPrimNot: + return Intrinsics::kUnsafeCASObject; + case Primitive::Type::kPrimInt: + return Intrinsics::kUnsafeCASInt; + case Primitive::Type::kPrimLong: + return Intrinsics::kUnsafeCASLong; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + case kIntrinsicUnsafeGet: { + const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile); + switch (GetType(method.d.data, false)) { + case Primitive::Type::kPrimInt: + return is_volatile ? Intrinsics::kUnsafeGetVolatile : Intrinsics::kUnsafeGet; + case Primitive::Type::kPrimLong: + return is_volatile ? Intrinsics::kUnsafeGetLongVolatile : Intrinsics::kUnsafeGetLong; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + } + case kIntrinsicUnsafePut: { + enum Sync { kNoSync, kVolatile, kOrdered }; + const Sync sync = + ((method.d.data & kIntrinsicFlagIsVolatile) != 0) ? kVolatile : + ((method.d.data & kIntrinsicFlagIsOrdered) != 0) ? kOrdered : + kNoSync; + switch (GetType(method.d.data, false)) { + case Primitive::Type::kPrimInt: + switch (sync) { + case kNoSync: + return Intrinsics::kUnsafePut; + case kVolatile: + return Intrinsics::kUnsafePutVolatile; + case kOrdered: + return Intrinsics::kUnsafePutOrdered; + } + break; + case Primitive::Type::kPrimLong: + switch (sync) { + case kNoSync: + return Intrinsics::kUnsafePutLong; + case kVolatile: + return Intrinsics::kUnsafePutLongVolatile; + case kOrdered: + return Intrinsics::kUnsafePutLongOrdered; + } + break; + case Primitive::Type::kPrimNot: + switch (sync) { + case kNoSync: + return Intrinsics::kUnsafePutObject; + case kVolatile: + return Intrinsics::kUnsafePutObjectVolatile; + case kOrdered: + return Intrinsics::kUnsafePutObjectOrdered; + } + break; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + } + + // Virtual cases. + + case kIntrinsicReferenceGetReferent: + return Intrinsics::kReferenceGetReferent; + + // Quick inliner cases. Remove after refactoring. They are here so that we can use the + // compiler to warn on missing cases. + + case kInlineOpNop: + case kInlineOpReturnArg: + case kInlineOpNonWideConst: + case kInlineOpIGet: + case kInlineOpIPut: + return Intrinsics::kNone; + + // No default case to make the compiler warn on missing cases. + } + return Intrinsics::kNone; +} + +static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) { + // The DexFileMethodInliner should have checked whether the methods are agreeing with + // what we expect, i.e., static methods are called as such. Add another check here for + // our expectations: + // Whenever the intrinsic is marked as static-or-direct, report an error if we find an + // InvokeVirtual. The other direction is not possible: we have intrinsics for virtual + // functions that will perform a check inline. If the precise type is known, however, + // the instruction will be sharpened to an InvokeStaticOrDirect. + InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic); + InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ? + invoke->AsInvokeStaticOrDirect()->GetInvokeType() : + invoke->IsInvokeVirtual() ? kVirtual : kSuper; + switch (intrinsic_type) { + case kStatic: + return (invoke_type == kStatic); + case kDirect: + return (invoke_type == kDirect); + case kVirtual: + // Call might be devirtualized. + return (invoke_type == kVirtual || invoke_type == kDirect); + + default: + return false; + } +} + +// TODO: Refactor DexFileMethodInliner and have something nicer than InlineMethod. +void IntrinsicsRecognizer::Run() { + DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(dex_file_); + DCHECK(inliner != nullptr); + + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); + inst_it.Advance()) { + HInstruction* inst = inst_it.Current(); + if (inst->IsInvoke()) { + HInvoke* invoke = inst->AsInvoke(); + InlineMethod method; + if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) { + Intrinsics intrinsic = GetIntrinsic(method); + + if (intrinsic != Intrinsics::kNone) { + if (!CheckInvokeType(intrinsic, invoke)) { + LOG(WARNING) << "Found an intrinsic with unexpected invoke type: " + << intrinsic << " for " + << PrettyMethod(invoke->GetDexMethodIndex(), *dex_file_); + } else { + invoke->SetIntrinsic(intrinsic); + } + } + } + } + } + } +} + +std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { + switch (intrinsic) { + case Intrinsics::kNone: + os << "No intrinsic."; + break; +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + case Intrinsics::k ## Name: \ + os << # Name; \ + break; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef STATIC_INTRINSICS_LIST +#undef VIRTUAL_INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return os; +} + +} // namespace art + diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h new file mode 100644 index 0000000000..29cc8efcc3 --- /dev/null +++ b/compiler/optimizing/intrinsics.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +class CompilerDriver; +class DexFile; + +// Recognize intrinsics from HInvoke nodes. +class IntrinsicsRecognizer : public HOptimization { + public: + IntrinsicsRecognizer(HGraph* graph, const DexFile* dex_file, CompilerDriver* driver) + : HOptimization(graph, true, "intrinsics_recognition"), + dex_file_(dex_file), driver_(driver) {} + + void Run() OVERRIDE; + + private: + const DexFile* dex_file_; + CompilerDriver* driver_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer); +}; + +class IntrinsicVisitor : public ValueObject { + public: + virtual ~IntrinsicVisitor() {} + + // Dispatch logic. + + void Dispatch(HInvoke* invoke) { + switch (invoke->GetIntrinsic()) { + case Intrinsics::kNone: + return; +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + case Intrinsics::k ## Name: \ + Visit ## Name(invoke); \ + return; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Do not put a default case. That way the compiler will complain if we missed a case. + } + } + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ + } +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + protected: + IntrinsicVisitor() {} + + private: + DISALLOW_COPY_AND_ASSIGN(IntrinsicVisitor); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_H_ diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h new file mode 100644 index 0000000000..29ca20cca0 --- /dev/null +++ b/compiler/optimizing/intrinsics_list.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ + +// All intrinsics supported by the optimizing compiler. Format is name, then whether it is expected +// to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual). + +#define INTRINSICS_LIST(V) \ + V(DoubleDoubleToRawLongBits, kStatic) \ + V(DoubleLongBitsToDouble, kStatic) \ + V(FloatFloatToRawIntBits, kStatic) \ + V(FloatIntBitsToFloat, kStatic) \ + V(IntegerReverse, kStatic) \ + V(IntegerReverseBytes, kStatic) \ + V(LongReverse, kStatic) \ + V(LongReverseBytes, kStatic) \ + V(ShortReverseBytes, kStatic) \ + V(MathAbsDouble, kStatic) \ + V(MathAbsFloat, kStatic) \ + V(MathAbsLong, kStatic) \ + V(MathAbsInt, kStatic) \ + V(MathMinDoubleDouble, kStatic) \ + V(MathMinFloatFloat, kStatic) \ + V(MathMinLongLong, kStatic) \ + V(MathMinIntInt, kStatic) \ + V(MathMaxDoubleDouble, kStatic) \ + V(MathMaxFloatFloat, kStatic) \ + V(MathMaxLongLong, kStatic) \ + V(MathMaxIntInt, kStatic) \ + V(MathSqrt, kStatic) \ + V(MathCeil, kStatic) \ + V(MathFloor, kStatic) \ + V(MathRint, kStatic) \ + V(MathRoundDouble, kStatic) \ + V(MathRoundFloat, kStatic) \ + V(SystemArrayCopyChar, kStatic) \ + V(ThreadCurrentThread, kStatic) \ + V(MemoryPeekByte, kStatic) \ + V(MemoryPeekIntNative, kStatic) \ + V(MemoryPeekLongNative, kStatic) \ + V(MemoryPeekShortNative, kStatic) \ + V(MemoryPokeByte, kStatic) \ + V(MemoryPokeIntNative, kStatic) \ + V(MemoryPokeLongNative, kStatic) \ + V(MemoryPokeShortNative, kStatic) \ + V(StringCharAt, kDirect) \ + V(StringCompareTo, kDirect) \ + V(StringIsEmpty, kDirect) \ + V(StringIndexOf, kDirect) \ + V(StringIndexOfAfter, kDirect) \ + V(StringLength, kDirect) \ + V(UnsafeCASInt, kDirect) \ + V(UnsafeCASLong, kDirect) \ + V(UnsafeCASObject, kDirect) \ + V(UnsafeGet, kDirect) \ + V(UnsafeGetVolatile, kDirect) \ + V(UnsafeGetLong, kDirect) \ + V(UnsafeGetLongVolatile, kDirect) \ + V(UnsafePut, kDirect) \ + V(UnsafePutOrdered, kDirect) \ + V(UnsafePutVolatile, kDirect) \ + V(UnsafePutObject, kDirect) \ + V(UnsafePutObjectOrdered, kDirect) \ + V(UnsafePutObjectVolatile, kDirect) \ + V(UnsafePutLong, kDirect) \ + V(UnsafePutLongOrdered, kDirect) \ + V(UnsafePutLongVolatile, kDirect) \ + \ + V(ReferenceGetReferent, kVirtual) + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ +#undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ // #define is only for lint. diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc new file mode 100644 index 0000000000..c1f4c94b7f --- /dev/null +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -0,0 +1,984 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_x86_64.h" + +#include "code_generator_x86_64.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "intrinsics.h" +#include "mirror/array-inl.h" +#include "mirror/art_method.h" +#include "mirror/string.h" +#include "thread.h" +#include "utils/x86_64/assembler_x86_64.h" +#include "utils/x86_64/constants_x86_64.h" + +namespace art { + +namespace x86_64 { + +static constexpr bool kIntrinsified = true; + +X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() { + return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); +} + +ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetArena() { + return codegen_->GetGraph()->GetArena(); +} + +bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + const LocationSummary* res = invoke->GetLocations(); + return res != nullptr && res->Intrinsified(); +} + +#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())-> + +// TODO: trg as memory. +static void MoveFromReturnRegister(Location trg, + Primitive::Type type, + CodeGeneratorX86_64* codegen) { + if (!trg.IsValid()) { + DCHECK(type == Primitive::kPrimVoid); + return; + } + + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + CpuRegister trg_reg = trg.AsRegister<CpuRegister>(); + if (trg_reg.AsRegister() != RAX) { + __ movl(trg_reg, CpuRegister(RAX)); + } + break; + } + case Primitive::kPrimLong: { + CpuRegister trg_reg = trg.AsRegister<CpuRegister>(); + if (trg_reg.AsRegister() != RAX) { + __ movq(trg_reg, CpuRegister(RAX)); + } + break; + } + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected void type for valid location " << trg; + UNREACHABLE(); + + case Primitive::kPrimDouble: { + XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>(); + if (trg_reg.AsFloatRegister() != XMM0) { + __ movsd(trg_reg, XmmRegister(XMM0)); + } + break; + } + case Primitive::kPrimFloat: { + XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>(); + if (trg_reg.AsFloatRegister() != XMM0) { + __ movss(trg_reg, XmmRegister(XMM0)); + } + break; + } + } +} + +static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) { + if (invoke->InputCount() == 0) { + return; + } + + LocationSummary* locations = invoke->GetLocations(); + InvokeDexCallingConventionVisitor calling_convention_visitor; + + // We're moving potentially two or more locations to locations that could overlap, so we need + // a parallel move resolver. + HParallelMove parallel_move(arena); + + for (size_t i = 0; i < invoke->InputCount(); i++) { + HInstruction* input = invoke->InputAt(i); + Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); + Location actual_loc = locations->InAt(i); + + parallel_move.AddMove(new (arena) MoveOperands(actual_loc, cc_loc, nullptr)); + } + + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +} + +// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified +// call. This will copy the arguments into the positions for a regular call. +// +// Note: The actual parameters are required to be in the locations given by the invoke's location +// summary. If an intrinsic modifies those locations before a slowpath call, they must be +// restored! +class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 { + public: + explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { } + + void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in); + __ Bind(GetEntryLabel()); + + codegen->SaveLiveRegisters(invoke_->GetLocations()); + + MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + + if (invoke_->IsInvokeStaticOrDirect()) { + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI)); + } else { + UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; + UNREACHABLE(); + } + + // Copy the result back to the expected output. + Location out = invoke_->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. + DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + MoveFromReturnRegister(out, invoke_->GetType(), codegen); + } + + codegen->RestoreLiveRegisters(invoke_->GetLocations()); + __ jmp(GetExitLabel()); + } + + private: + // The instruction where this slow path is happening. + HInvoke* const invoke_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64); +}; + +#undef __ +#define __ assembler-> + +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit); +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit); +} + +void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +static void GenReverseBytes(LocationSummary* locations, + Primitive::Type size, + X86_64Assembler* assembler) { + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + switch (size) { + case Primitive::kPrimShort: + // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. + __ bswapl(out); + __ sarl(out, Immediate(16)); + break; + case Primitive::kPrimInt: + __ bswapl(out); + break; + case Primitive::kPrimLong: + __ bswapq(out); + break; + default: + LOG(FATAL) << "Unexpected size for reverse-bytes: " << size; + UNREACHABLE(); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + + +// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we +// need is 64b. + +static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) { + // TODO: Enable memory operations when the assembler supports them. + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + // TODO: Allow x86 to work with memory. This requires assembler support, see below. + // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly. + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); // Immediate constant. + locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above. +} + +static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { + Location output = locations->Out(); + CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + + if (output.IsFpuRegister()) { + // In-register + XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + + if (is64bit) { + __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); + __ movd(xmm_temp, cpu_temp); + __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); + } else { + __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF))); + __ movd(xmm_temp, cpu_temp); + __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); + } + } else { + // TODO: update when assember support is available. + UNIMPLEMENTED(FATAL) << "Needs assembler support."; +// Once assembler support is available, in-memory operations look like this: +// if (is64bit) { +// DCHECK(output.IsDoubleStackSlot()); +// // No 64b and with literal. +// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); +// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp); +// } else { +// DCHECK(output.IsStackSlot()); +// // Can use and with a literal directly. +// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF))); +// } + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { + CreateFloatToFloatPlusTemps(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { + CreateFloatToFloatPlusTemps(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { + Location output = locations->Out(); + CpuRegister out = output.AsRegister<CpuRegister>(); + CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); + + if (is64bit) { + // Create mask. + __ movq(mask, out); + __ sarq(mask, Immediate(63)); + // Add mask. + __ addq(out, mask); + __ xorq(out, mask); + } else { + // Create mask. + __ movl(mask, out); + __ sarl(mask, Immediate(31)); + // Add mask. + __ addl(out, mask); + __ xorl(out, mask); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, + X86_64Assembler* assembler) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + Label nan, done, op2_label; + if (is_double) { + __ ucomisd(out, op2); + } else { + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (is_double) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (is_double) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access. + if (is_double) { + __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000))); + } else { + __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000))); + } + __ movd(out, cpu_temp, is_double); + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (is_double) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + // The following is sub-optimal, but all we can do for now. It would be fine to also accept + // the second input to be the output (we can simply swap inputs). + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); // Immediate constant. +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPPlusTempLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { + CreateFPFPToFPPlusTempLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPPlusTempLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { + CreateFPFPToFPPlusTempLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); +} + +static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, + X86_64Assembler* assembler) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + if (is_long) { + __ cmpq(out, op2); + } else { + __ cmpl(out, op2); + } + + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); +} + +static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, true, GetAssembler()); +} + +static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + + GetAssembler()->sqrtsd(out, in); +} + +void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) { + // The inputs plus one temp. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + + // Location of reference to data array + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + // Location of count + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + // Starting offset within data array + const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value(); + // Start of char data with array_ + const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value(); + + CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location temp_loc = locations->GetTemp(0); + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + + // Note: Nullcheck has been done before in a HNullCheck before the HInvokeVirtual. If/when we + // move to (coalesced) implicit checks, we have to do a null check below. + DCHECK(!kCoalescedImplicitNullCheck); + + // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth + // the cost. + // TODO: For simplicity, the index parameter is requested in a register, so different from Quick + // we will not optimize the code for constants (which would save a register). + + SlowPathCodeX86_64* slow_path = new (GetArena()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(slow_path); + + X86_64Assembler* assembler = GetAssembler(); + + __ cmpl(idx, Address(obj, count_offset)); + __ j(kAboveEqual, slow_path->GetEntryLabel()); + + // Get the actual element. + __ movl(temp, idx); // temp := idx. + __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx. + __ movl(out, Address(obj, value_offset)); // obj := obj.array. + // out = out[2*temp]. + __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset)); + + __ Bind(slow_path->GetExitLabel()); +} + +static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { + CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity. + // x86 allows unaligned access. We do not have to check the input or use specific instructions + // to avoid a SIGBUS. + switch (size) { + case Primitive::kPrimByte: + __ movsxb(out, Address(address, 0)); + break; + case Primitive::kPrimShort: + __ movsxw(out, Address(address, 0)); + break; + case Primitive::kPrimInt: + __ movl(out, Address(address, 0)); + break; + case Primitive::kPrimLong: + __ movq(out, Address(address, 0)); + break; + default: + LOG(FATAL) << "Type not recognized for peek: " << size; + UNREACHABLE(); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + +static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { + CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); + // x86 allows unaligned access. We do not have to check the input or use specific instructions + // to avoid a SIGBUS. + switch (size) { + case Primitive::kPrimByte: + __ movb(Address(address, 0), value); + break; + case Primitive::kPrimShort: + __ movw(Address(address, 0), value); + break; + case Primitive::kPrimInt: + __ movl(Address(address, 0), value); + break; + case Primitive::kPrimLong: + __ movq(Address(address, 0), value); + break; + default: + LOG(FATAL) << "Type not recognized for poke: " << size; + UNREACHABLE(); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { + CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); + GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true)); +} + +static void GenUnsafeGet(LocationSummary* locations, bool is_long, + bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) { + CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); + CpuRegister trg = locations->Out().AsRegister<CpuRegister>(); + + if (is_long) { + __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + } else { + // TODO: Distinguish object. In case we move to an actual compressed heap, retrieving an object + // pointer will entail an unpack operation. + __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + } +} + +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), false, false, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), false, true, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), true, false, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), true, true, GetAssembler()); +} + +static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, + Primitive::Type type, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + if (type == Primitive::kPrimNot) { + // Need temp registers for card-marking. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); +} + +// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 +// memory model. +static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile, + CodeGeneratorX86_64* codegen) { + X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler()); + CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); + CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>(); + + if (type == Primitive::kPrimLong) { + __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value); + } else { + __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value); + } + + if (is_volatile) { + __ mfence(); + } + + if (type == Primitive::kPrimNot) { + codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), + locations->GetTemp(1).AsRegister<CpuRegister>(), + base, + value); + } +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); +} + +// Unimplemented intrinsics. + +#define UNIMPLEMENTED_INTRINSIC(Name) \ +void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} \ +void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} + +UNIMPLEMENTED_INTRINSIC(IntegerReverse) +UNIMPLEMENTED_INTRINSIC(LongReverse) +UNIMPLEMENTED_INTRINSIC(MathFloor) +UNIMPLEMENTED_INTRINSIC(MathCeil) +UNIMPLEMENTED_INTRINSIC(MathRint) +UNIMPLEMENTED_INTRINSIC(MathRoundDouble) +UNIMPLEMENTED_INTRINSIC(MathRoundFloat) +UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should +UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here. +UNIMPLEMENTED_INTRINSIC(StringCompareTo) +UNIMPLEMENTED_INTRINSIC(StringIndexOf) +UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) +UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) +UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) +UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) + +} // namespace x86_64 +} // namespace art diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h new file mode 100644 index 0000000000..c1fa99c2dc --- /dev/null +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_ + +#include "intrinsics.h" + +namespace art { + +class ArenaAllocator; +class HInvokeStaticOrDirect; +class HInvokeVirtual; + +namespace x86_64 { + +class CodeGeneratorX86_64; +class X86_64Assembler; + +class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* arena_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64); +}; + +class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorX86_64(CodeGeneratorX86_64* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + private: + X86_64Assembler* GetAssembler(); + + ArenaAllocator* GetArena(); + + CodeGeneratorX86_64* codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86_64); +}; + +} // namespace x86_64 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_ diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index ed5e260a5b..9f2f9ece85 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -20,7 +20,9 @@ namespace art { -LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind) +LocationSummary::LocationSummary(HInstruction* instruction, + CallKind call_kind, + bool intrinsified) : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()), temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0), environment_(instruction->GetBlock()->GetGraph()->GetArena(), @@ -29,7 +31,8 @@ LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind) call_kind_(call_kind), stack_mask_(nullptr), register_mask_(0), - live_registers_() { + live_registers_(), + intrinsified_(intrinsified) { inputs_.SetSize(instruction->InputCount()); for (size_t i = 0; i < instruction->InputCount(); ++i) { inputs_.Put(i, Location()); diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 7df99d4b6f..d41b3aecfd 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -463,7 +463,9 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { kCall }; - LocationSummary(HInstruction* instruction, CallKind call_kind = kNoCall); + LocationSummary(HInstruction* instruction, + CallKind call_kind = kNoCall, + bool intrinsified = false); void SetInAt(uint32_t at, Location location) { DCHECK(inputs_.Get(at).IsUnallocated() || inputs_.Get(at).IsInvalid()); @@ -574,6 +576,10 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return output_overlaps_; } + bool Intrinsified() const { + return intrinsified_; + } + private: GrowableArray<Location> inputs_; GrowableArray<Location> temps_; @@ -593,6 +599,9 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { // Registers that are in use at this position. RegisterSet live_registers_; + // Whether these are locations for an intrinsified call. + const bool intrinsified_; + ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint); ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint); DISALLOW_COPY_AND_ASSIGN(LocationSummary); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index b98bc70a9f..f2aa043849 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1580,19 +1580,18 @@ class HLongConstant : public HConstant { DISALLOW_COPY_AND_ASSIGN(HLongConstant); }; +enum class Intrinsics { +#define OPTIMIZING_INTRINSICS(Name, IsStatic) k ## Name, +#include "intrinsics_list.h" + kNone, + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS +}; +std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic); + class HInvoke : public HInstruction { public: - HInvoke(ArenaAllocator* arena, - uint32_t number_of_arguments, - Primitive::Type return_type, - uint32_t dex_pc) - : HInstruction(SideEffects::All()), - inputs_(arena, number_of_arguments), - return_type_(return_type), - dex_pc_(dex_pc) { - inputs_.SetSize(number_of_arguments); - } - virtual size_t InputCount() const { return inputs_.Size(); } virtual HInstruction* InputAt(size_t i) const { return inputs_.Get(i); } @@ -1612,12 +1611,38 @@ class HInvoke : public HInstruction { uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexMethodIndex() const { return dex_method_index_; } + + Intrinsics GetIntrinsic() { + return intrinsic_; + } + + void SetIntrinsic(Intrinsics intrinsic) { + intrinsic_ = intrinsic; + } + DECLARE_INSTRUCTION(Invoke); protected: + HInvoke(ArenaAllocator* arena, + uint32_t number_of_arguments, + Primitive::Type return_type, + uint32_t dex_pc, + uint32_t dex_method_index) + : HInstruction(SideEffects::All()), + inputs_(arena, number_of_arguments), + return_type_(return_type), + dex_pc_(dex_pc), + dex_method_index_(dex_method_index), + intrinsic_(Intrinsics::kNone) { + inputs_.SetSize(number_of_arguments); + } + GrowableArray<HInstruction*> inputs_; const Primitive::Type return_type_; const uint32_t dex_pc_; + const uint32_t dex_method_index_; + Intrinsics intrinsic_; private: DISALLOW_COPY_AND_ASSIGN(HInvoke); @@ -1629,19 +1654,16 @@ class HInvokeStaticOrDirect : public HInvoke { uint32_t number_of_arguments, Primitive::Type return_type, uint32_t dex_pc, - uint32_t index_in_dex_cache, + uint32_t dex_method_index, InvokeType invoke_type) - : HInvoke(arena, number_of_arguments, return_type, dex_pc), - index_in_dex_cache_(index_in_dex_cache), + : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), invoke_type_(invoke_type) {} - uint32_t GetIndexInDexCache() const { return index_in_dex_cache_; } InvokeType GetInvokeType() const { return invoke_type_; } DECLARE_INSTRUCTION(InvokeStaticOrDirect); private: - const uint32_t index_in_dex_cache_; const InvokeType invoke_type_; DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect); @@ -1653,8 +1675,9 @@ class HInvokeVirtual : public HInvoke { uint32_t number_of_arguments, Primitive::Type return_type, uint32_t dex_pc, + uint32_t dex_method_index, uint32_t vtable_index) - : HInvoke(arena, number_of_arguments, return_type, dex_pc), + : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), vtable_index_(vtable_index) {} uint32_t GetVTableIndex() const { return vtable_index_; } @@ -1675,8 +1698,7 @@ class HInvokeInterface : public HInvoke { uint32_t dex_pc, uint32_t dex_method_index, uint32_t imt_index) - : HInvoke(arena, number_of_arguments, return_type, dex_pc), - dex_method_index_(dex_method_index), + : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), imt_index_(imt_index) {} uint32_t GetImtIndex() const { return imt_index_; } @@ -1685,7 +1707,6 @@ class HInvokeInterface : public HInvoke { DECLARE_INSTRUCTION(InvokeInterface); private: - const uint32_t dex_method_index_; const uint32_t imt_index_; DISALLOW_COPY_AND_ASSIGN(HInvokeInterface); @@ -2775,10 +2796,16 @@ class HParallelMove : public HTemplateInstruction<0> { : HTemplateInstruction(SideEffects::None()), moves_(arena, kDefaultNumberOfMoves) {} void AddMove(MoveOperands* move) { - if (kIsDebugBuild && move->GetInstruction() != nullptr) { + if (kIsDebugBuild) { + if (move->GetInstruction() != nullptr) { + for (size_t i = 0, e = moves_.Size(); i < e; ++i) { + DCHECK_NE(moves_.Get(i)->GetInstruction(), move->GetInstruction()) + << "Doing parallel moves for the same instruction."; + } + } for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK_NE(moves_.Get(i)->GetInstruction(), move->GetInstruction()) - << "Doing parallel moves for the same instruction."; + DCHECK(!move->GetDestination().Contains(moves_.Get(i)->GetDestination())) + << "Same destination for two moves in a parallel move."; } } moves_.Add(move); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 692d452f54..605637300f 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -25,6 +25,7 @@ #include "compiler.h" #include "constant_folding.h" #include "dead_code_elimination.h" +#include "dex/quick/dex_file_to_method_inliner_map.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "elf_writer_quick.h" @@ -32,6 +33,7 @@ #include "gvn.h" #include "inliner.h" #include "instruction_simplifier.h" +#include "intrinsics.h" #include "jni/quick/jni_compiler.h" #include "mirror/art_method-inl.h" #include "nodes.h" @@ -149,11 +151,12 @@ void OptimizingCompiler::Init() { // Enable C1visualizer output. Must be done in Init() because the compiler // driver is not fully initialized when passed to the compiler's constructor. CompilerDriver* driver = GetCompilerDriver(); - if (driver->GetDumpPasses()) { + const std::string cfg_file_name = driver->GetDumpCfgFileName(); + if (!cfg_file_name.empty()) { CHECK_EQ(driver->GetThreadCount(), 1U) << "Graph visualizer requires the compiler to run single-threaded. " << "Invoke the compiler with '-j1'."; - visualizer_output_.reset(new std::ofstream("art.cfg")); + visualizer_output_.reset(new std::ofstream(cfg_file_name)); } } @@ -214,9 +217,12 @@ static void RunOptimizations(HGraph* graph, BoundsCheckElimination bce(graph); InstructionSimplifier simplify2(graph); + IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver); + HOptimization* optimizations[] = { &redundant_phi, &dead_phi, + &intrinsics, &dce, &fold, &simplify1, diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 04b56345c4..b3eb1e2d51 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -45,8 +45,12 @@ namespace art { LiveInterval* BuildInterval(const size_t ranges[][2], size_t number_of_ranges, ArenaAllocator* allocator, - int reg = -1) { - LiveInterval* interval = LiveInterval::MakeInterval(allocator, Primitive::kPrimInt); + int reg = -1, + HInstruction* defined_by = nullptr) { + LiveInterval* interval = LiveInterval::MakeInterval(allocator, Primitive::kPrimInt, defined_by); + if (defined_by != nullptr) { + defined_by->SetLiveInterval(interval); + } for (size_t i = number_of_ranges; i > 0; --i) { interval->AddRange(ranges[i - 1][0], ranges[i - 1][1]); } diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 210f7d7f09..7ab41b6ddc 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -120,16 +120,9 @@ TEST(ParallelMoveTest, Swap) { { TestParallelMoveResolver resolver(&allocator); - static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}}; + static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 0}}; resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1)", resolver.GetMessage().c_str()); - } - - { - TestParallelMoveResolver resolver(&allocator); - static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}, {5, 4}}; - resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves))); - ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1) (5 -> 4)", resolver.GetMessage().c_str()); + ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str()); } } diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 1efc52b9ec..93ed44ee44 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -56,7 +56,8 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, blocked_core_registers_(codegen->GetBlockedCoreRegisters()), blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()), reserved_out_slots_(0), - maximum_number_of_live_registers_(0) { + maximum_number_of_live_core_registers_(0), + maximum_number_of_live_fp_registers_(0) { codegen->SetupBlockedRegisters(); physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters()); physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters()); @@ -185,9 +186,6 @@ void RegisterAllocator::AllocateRegistersInternal() { } LinearScan(); - size_t saved_maximum_number_of_live_registers = maximum_number_of_live_registers_; - maximum_number_of_live_registers_ = 0; - inactive_.Reset(); active_.Reset(); handled_.Reset(); @@ -207,7 +205,6 @@ void RegisterAllocator::AllocateRegistersInternal() { } } LinearScan(); - maximum_number_of_live_registers_ += saved_maximum_number_of_live_registers; } void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { @@ -602,8 +599,13 @@ void RegisterAllocator::LinearScan() { if (current->IsSlowPathSafepoint()) { // Synthesized interval to record the maximum number of live registers // at safepoints. No need to allocate a register for it. - maximum_number_of_live_registers_ = - std::max(maximum_number_of_live_registers_, active_.Size()); + if (processing_core_registers_) { + maximum_number_of_live_core_registers_ = + std::max(maximum_number_of_live_core_registers_, active_.Size()); + } else { + maximum_number_of_live_fp_registers_ = + std::max(maximum_number_of_live_fp_registers_, active_.Size()); + } DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart()); continue; } @@ -855,7 +857,9 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { DCHECK(!active->IsFixed()); LiveInterval* split = Split(active, current->GetStart()); active_.DeleteAt(i); - handled_.Add(active); + if (split != active) { + handled_.Add(active); + } AddSorted(unhandled_, split); break; } @@ -876,9 +880,14 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { if (next_intersection != kNoLifetime) { if (inactive->IsFixed()) { LiveInterval* split = Split(current, next_intersection); + DCHECK_NE(split, current); AddSorted(unhandled_, split); } else { - LiveInterval* split = Split(inactive, next_intersection); + // Split at the start of `current`, which will lead to splitting + // at the end of the lifetime hole of `inactive`. + LiveInterval* split = Split(inactive, current->GetStart()); + // If it's inactive, it must start before the current interval. + DCHECK_NE(split, inactive); inactive_.DeleteAt(i); --i; --e; @@ -1215,10 +1224,17 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { locations->SetEnvironmentAt(use->GetInputIndex(), source); } else { Location expected_location = locations->InAt(use->GetInputIndex()); - if (expected_location.IsUnallocated()) { - locations->SetInAt(use->GetInputIndex(), source); - } else if (!expected_location.IsConstant()) { - AddInputMoveFor(use->GetUser(), source, expected_location); + // The expected (actual) location may be invalid in case the input is unused. Currently + // this only happens for intrinsics. + if (expected_location.IsValid()) { + if (expected_location.IsUnallocated()) { + locations->SetInAt(use->GetInputIndex(), source); + } else if (!expected_location.IsConstant()) { + AddInputMoveFor(use->GetUser(), source, expected_location); + } + } else { + DCHECK(use->GetUser()->IsInvoke()); + DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); } } use = use->GetNext(); @@ -1255,8 +1271,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { switch (source.GetKind()) { case Location::kRegister: { locations->AddLiveRegister(source); - DCHECK_LE(locations->GetNumberOfLiveRegisters(), maximum_number_of_live_registers_); - + DCHECK_LE(locations->GetNumberOfLiveRegisters(), + maximum_number_of_live_core_registers_ + + maximum_number_of_live_fp_registers_); if (current->GetType() == Primitive::kPrimNot) { locations->SetRegisterBit(source.reg()); } @@ -1349,7 +1366,8 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, void RegisterAllocator::Resolve() { codegen_->ComputeFrameSize( - spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_); + spill_slots_.Size(), maximum_number_of_live_core_registers_, + maximum_number_of_live_fp_registers_, reserved_out_slots_); // Adjust the Out Location of instructions. // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration. diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index c152a8bf67..ec46a776b5 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -190,10 +190,14 @@ class RegisterAllocator { // Slots reserved for out arguments. size_t reserved_out_slots_; - // The maximum live registers at safepoints. - size_t maximum_number_of_live_registers_; + // The maximum live core registers at safepoints. + size_t maximum_number_of_live_core_registers_; + + // The maximum live FP registers at safepoints. + size_t maximum_number_of_live_fp_registers_; ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil); + ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive); DISALLOW_COPY_AND_ASSIGN(RegisterAllocator); }; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index c2ea80ec33..0948643355 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -738,4 +738,106 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { } } +// Test a bug in the register allocator, where allocating a blocked +// register would lead to spilling an inactive interval at the wrong +// position. +TEST(RegisterAllocatorTest, SpillInactive) { + ArenaPool pool; + + // Create a synthesized graph to please the register_allocator and + // ssa_liveness_analysis code. + ArenaAllocator allocator(&pool); + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* one = new (&allocator) HParameterValue(0, Primitive::kPrimInt); + HInstruction* two = new (&allocator) HParameterValue(0, Primitive::kPrimInt); + HInstruction* three = new (&allocator) HParameterValue(0, Primitive::kPrimInt); + HInstruction* four = new (&allocator) HParameterValue(0, Primitive::kPrimInt); + entry->AddInstruction(one); + entry->AddInstruction(two); + entry->AddInstruction(three); + entry->AddInstruction(four); + + HBasicBlock* block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block); + entry->AddSuccessor(block); + block->AddInstruction(new (&allocator) HExit()); + + // We create a synthesized user requesting a register, to avoid just spilling the + // intervals. + HPhi* user = new (&allocator) HPhi(&allocator, 0, 1, Primitive::kPrimInt); + user->AddInput(one); + user->SetBlock(block); + LocationSummary* locations = new (&allocator) LocationSummary(user, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + static constexpr size_t phi_ranges[][2] = {{20, 30}}; + BuildInterval(phi_ranges, arraysize(phi_ranges), &allocator, -1, user); + + // Create an interval with lifetime holes. + static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}}; + LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), &allocator, -1, one); + first->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, first->first_use_); + first->first_use_ = new(&allocator) UsePosition(user, 0, false, 7, first->first_use_); + first->first_use_ = new(&allocator) UsePosition(user, 0, false, 6, first->first_use_); + + locations = new (&allocator) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); + first = first->SplitAt(1); + + // Create an interval that conflicts with the next interval, to force the next + // interval to call `AllocateBlockedReg`. + static constexpr size_t ranges2[][2] = {{2, 4}}; + LiveInterval* second = BuildInterval(ranges2, arraysize(ranges2), &allocator, -1, two); + locations = new (&allocator) LocationSummary(second->GetDefinedBy(), LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); + + // Create an interval that will lead to splitting the first interval. The bug occured + // by splitting at a wrong position, in this case at the next intersection between + // this interval and the first interval. We would have then put the interval with ranges + // "[0, 2(, [4, 6(" in the list of handled intervals, even though we haven't processed intervals + // before lifetime position 6 yet. + static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}}; + LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), &allocator, -1, three); + third->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, third->first_use_); + third->first_use_ = new(&allocator) UsePosition(user, 0, false, 4, third->first_use_); + third->first_use_ = new(&allocator) UsePosition(user, 0, false, 3, third->first_use_); + locations = new (&allocator) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); + third = third->SplitAt(3); + + // Because the first part of the split interval was considered handled, this interval + // was free to allocate the same register, even though it conflicts with it. + static constexpr size_t ranges4[][2] = {{4, 6}}; + LiveInterval* fourth = BuildInterval(ranges4, arraysize(ranges4), &allocator, -1, four); + locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); + + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); + + RegisterAllocator register_allocator(&allocator, &codegen, liveness); + register_allocator.unhandled_core_intervals_.Add(fourth); + register_allocator.unhandled_core_intervals_.Add(third); + register_allocator.unhandled_core_intervals_.Add(second); + register_allocator.unhandled_core_intervals_.Add(first); + + // Set just one register available to make all intervals compete for the same. + register_allocator.number_of_registers_ = 1; + register_allocator.registers_array_ = allocator.AllocArray<size_t>(1); + register_allocator.processing_core_registers_ = true; + register_allocator.unhandled_ = ®ister_allocator.unhandled_core_intervals_; + register_allocator.LinearScan(); + + // Test that there is no conflicts between intervals. + GrowableArray<LiveInterval*> intervals(&allocator, 0); + intervals.Add(first); + intervals.Add(second); + intervals.Add(third); + intervals.Add(fourth); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, 0, codegen, &allocator, true, false)); +} + } // namespace art diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 74611e1cbb..b632c4d05a 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -429,7 +429,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { LiveRange* current = first_range_; LiveRange* previous = nullptr; // Iterate over the ranges, and either find a range that covers this position, or - // a two ranges in between this position (that is, the position is in a lifetime hole). + // two ranges in between this position (that is, the position is in a lifetime hole). do { if (position >= current->GetEnd()) { // Move to next range. @@ -653,6 +653,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { static constexpr int kNoRegister = -1; static constexpr int kNoSpillSlot = -1; + ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive); + DISALLOW_COPY_AND_ASSIGN(LiveInterval); }; diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc index 385d1340fc..cb51ed8fc8 100644 --- a/compiler/trampolines/trampoline_compiler.cc +++ b/compiler/trampolines/trampoline_compiler.cc @@ -20,6 +20,7 @@ #include "utils/arm/assembler_arm.h" #include "utils/arm64/assembler_arm64.h" #include "utils/mips/assembler_mips.h" +#include "utils/mips64/assembler_mips64.h" #include "utils/x86/assembler_x86.h" #include "utils/x86_64/assembler_x86_64.h" @@ -120,6 +121,35 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention } } // namespace mips +namespace mips64 { +static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, + ThreadOffset<8> offset) { + std::unique_ptr<Mips64Assembler> assembler(static_cast<Mips64Assembler*>(Assembler::Create(kMips64))); + + switch (abi) { + case kInterpreterAbi: // Thread* is first argument (A0) in interpreter ABI. + __ LoadFromOffset(kLoadDoubleword, T9, A0, offset.Int32Value()); + break; + case kJniAbi: // Load via Thread* held in JNIEnv* in first argument (A0). + __ LoadFromOffset(kLoadDoubleword, T9, A0, JNIEnvExt::SelfOffset().Int32Value()); + __ LoadFromOffset(kLoadDoubleword, T9, T9, offset.Int32Value()); + break; + case kQuickAbi: // Fall-through. + __ LoadFromOffset(kLoadDoubleword, T9, S1, offset.Int32Value()); + } + __ Jr(T9); + __ Nop(); + __ Break(); + + size_t cs = assembler->CodeSize(); + std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); + MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); + assembler->FinalizeInstructions(code); + + return entry_stub.release(); +} +} // namespace mips64 + namespace x86 { static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) { std::unique_ptr<X86Assembler> assembler(static_cast<X86Assembler*>(Assembler::Create(kX86))); @@ -160,6 +190,8 @@ const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa, EntryPointCal switch (isa) { case kArm64: return arm64::CreateTrampoline(abi, offset); + case kMips64: + return mips64::CreateTrampoline(abi, offset); case kX86_64: return x86_64::CreateTrampoline(offset); default: diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc index 68345129c3..5340dd3a25 100644 --- a/compiler/utils/assembler.cc +++ b/compiler/utils/assembler.cc @@ -23,6 +23,7 @@ #include "arm/assembler_thumb2.h" #include "arm64/assembler_arm64.h" #include "mips/assembler_mips.h" +#include "mips64/assembler_mips64.h" #include "x86/assembler_x86.h" #include "x86_64/assembler_x86_64.h" #include "globals.h" @@ -115,6 +116,8 @@ Assembler* Assembler::Create(InstructionSet instruction_set) { return new arm64::Arm64Assembler(); case kMips: return new mips::MipsAssembler(); + case kMips64: + return new mips64::Mips64Assembler(); case kX86: return new x86::X86Assembler(); case kX86_64: diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 134dda4b2c..923ecdbd9d 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -47,6 +47,9 @@ namespace arm64 { namespace mips { class MipsAssembler; } +namespace mips64 { + class Mips64Assembler; +} namespace x86 { class X86Assembler; } @@ -120,6 +123,7 @@ class Label { friend class arm::Thumb2Assembler; friend class arm64::Arm64Assembler; friend class mips::MipsAssembler; + friend class mips64::Mips64Assembler; friend class x86::X86Assembler; friend class x86_64::X86_64Assembler; diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h index bfb2829a32..bb62bca3b9 100644 --- a/compiler/utils/managed_register.h +++ b/compiler/utils/managed_register.h @@ -30,6 +30,9 @@ class Arm64ManagedRegister; namespace mips { class MipsManagedRegister; } +namespace mips64 { +class Mips64ManagedRegister; +} namespace x86 { class X86ManagedRegister; @@ -54,6 +57,7 @@ class ManagedRegister { arm::ArmManagedRegister AsArm() const; arm64::Arm64ManagedRegister AsArm64() const; mips::MipsManagedRegister AsMips() const; + mips64::Mips64ManagedRegister AsMips64() const; x86::X86ManagedRegister AsX86() const; x86_64::X86_64ManagedRegister AsX86_64() const; diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc new file mode 100644 index 0000000000..233ae7db3c --- /dev/null +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -0,0 +1,1036 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "assembler_mips64.h" + +#include "base/casts.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "memory_region.h" +#include "thread.h" + +namespace art { +namespace mips64 { + +void Mips64Assembler::Emit(int32_t value) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + buffer_.Emit<int32_t>(value); +} + +void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, + int shamt, int funct) { + CHECK_NE(rs, kNoGpuRegister); + CHECK_NE(rt, kNoGpuRegister); + CHECK_NE(rd, kNoGpuRegister); + int32_t encoding = opcode << kOpcodeShift | + static_cast<int32_t>(rs) << kRsShift | + static_cast<int32_t>(rt) << kRtShift | + static_cast<int32_t>(rd) << kRdShift | + shamt << kShamtShift | + funct; + Emit(encoding); +} + +void Mips64Assembler::EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm) { + CHECK_NE(rs, kNoGpuRegister); + CHECK_NE(rt, kNoGpuRegister); + int32_t encoding = opcode << kOpcodeShift | + static_cast<int32_t>(rs) << kRsShift | + static_cast<int32_t>(rt) << kRtShift | + imm; + Emit(encoding); +} + +void Mips64Assembler::EmitJ(int opcode, int address) { + int32_t encoding = opcode << kOpcodeShift | + address; + Emit(encoding); +} + +void Mips64Assembler::EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, +int funct) { + CHECK_NE(ft, kNoFpuRegister); + CHECK_NE(fs, kNoFpuRegister); + CHECK_NE(fd, kNoFpuRegister); + int32_t encoding = opcode << kOpcodeShift | + fmt << kFmtShift | + static_cast<int32_t>(ft) << kFtShift | + static_cast<int32_t>(fs) << kFsShift | + static_cast<int32_t>(fd) << kFdShift | + funct; + Emit(encoding); +} + +void Mips64Assembler::EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm) { + CHECK_NE(rt, kNoFpuRegister); + int32_t encoding = opcode << kOpcodeShift | + fmt << kFmtShift | + static_cast<int32_t>(rt) << kRtShift | + imm; + Emit(encoding); +} + +void Mips64Assembler::EmitBranch(GpuRegister rt, GpuRegister rs, Label* label, bool equal) { + int offset; + if (label->IsBound()) { + offset = label->Position() - buffer_.Size(); + } else { + // Use the offset field of the branch instruction for linking the sites. + offset = label->position_; + label->LinkTo(buffer_.Size()); + } + if (equal) { + Beq(rt, rs, (offset >> 2) & kBranchOffsetMask); + } else { + Bne(rt, rs, (offset >> 2) & kBranchOffsetMask); + } +} + +void Mips64Assembler::EmitJump(Label* label, bool link) { + int offset; + if (label->IsBound()) { + offset = label->Position() - buffer_.Size(); + } else { + // Use the offset field of the jump instruction for linking the sites. + offset = label->position_; + label->LinkTo(buffer_.Size()); + } + if (link) { + Jal((offset >> 2) & kJumpOffsetMask); + } else { + J((offset >> 2) & kJumpOffsetMask); + } +} + +int32_t Mips64Assembler::EncodeBranchOffset(int offset, int32_t inst, bool is_jump) { + CHECK_ALIGNED(offset, 4); + CHECK(IsInt(POPCOUNT(kBranchOffsetMask), offset)) << offset; + + // Properly preserve only the bits supported in the instruction. + offset >>= 2; + if (is_jump) { + offset &= kJumpOffsetMask; + return (inst & ~kJumpOffsetMask) | offset; + } else { + offset &= kBranchOffsetMask; + return (inst & ~kBranchOffsetMask) | offset; + } +} + +int Mips64Assembler::DecodeBranchOffset(int32_t inst, bool is_jump) { + // Sign-extend, then left-shift by 2. + if (is_jump) { + return (((inst & kJumpOffsetMask) << 6) >> 4); + } else { + return (((inst & kBranchOffsetMask) << 16) >> 14); + } +} + +void Mips64Assembler::Bind(Label* label, bool is_jump) { + CHECK(!label->IsBound()); + int bound_pc = buffer_.Size(); + while (label->IsLinked()) { + int32_t position = label->Position(); + int32_t next = buffer_.Load<int32_t>(position); + int32_t offset = is_jump ? bound_pc - position : bound_pc - position - 4; + int32_t encoded = Mips64Assembler::EncodeBranchOffset(offset, next, is_jump); + buffer_.Store<int32_t>(position, encoded); + label->position_ = Mips64Assembler::DecodeBranchOffset(next, is_jump); + } + label->BindTo(bound_pc); +} + +void Mips64Assembler::Add(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 0, 0x20); +} + +void Mips64Assembler::Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x8, rs, rt, imm16); +} + +void Mips64Assembler::Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 0, 0x21); +} + +void Mips64Assembler::Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x9, rs, rt, imm16); +} + +void Mips64Assembler::Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x19, rs, rt, imm16); +} + +void Mips64Assembler::Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 0, 0x22); +} + +void Mips64Assembler::Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 0, 0x23); +} + +void Mips64Assembler::Mult(GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x18); +} + +void Mips64Assembler::Multu(GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x19); +} + +void Mips64Assembler::Div(GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1a); +} + +void Mips64Assembler::Divu(GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1b); +} + +void Mips64Assembler::And(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 0, 0x24); +} + +void Mips64Assembler::Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0xc, rs, rt, imm16); +} + +void Mips64Assembler::Or(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 0, 0x25); +} + +void Mips64Assembler::Ori(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0xd, rs, rt, imm16); +} + +void Mips64Assembler::Xor(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 0, 0x26); +} + +void Mips64Assembler::Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0xe, rs, rt, imm16); +} + +void Mips64Assembler::Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 0, 0x27); +} + +void Mips64Assembler::Sll(GpuRegister rd, GpuRegister rs, int shamt) { + EmitR(0, rs, static_cast<GpuRegister>(0), rd, shamt, 0x00); +} + +void Mips64Assembler::Srl(GpuRegister rd, GpuRegister rs, int shamt) { + EmitR(0, rs, static_cast<GpuRegister>(0), rd, shamt, 0x02); +} + +void Mips64Assembler::Sra(GpuRegister rd, GpuRegister rs, int shamt) { + EmitR(0, rs, static_cast<GpuRegister>(0), rd, shamt, 0x03); +} + +void Mips64Assembler::Sllv(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 0, 0x04); +} + +void Mips64Assembler::Srlv(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 0, 0x06); +} + +void Mips64Assembler::Srav(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 0, 0x07); +} + +void Mips64Assembler::Lb(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x20, rs, rt, imm16); +} + +void Mips64Assembler::Lh(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x21, rs, rt, imm16); +} + +void Mips64Assembler::Lw(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x23, rs, rt, imm16); +} + +void Mips64Assembler::Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x37, rs, rt, imm16); +} + +void Mips64Assembler::Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x24, rs, rt, imm16); +} + +void Mips64Assembler::Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x25, rs, rt, imm16); +} + +void Mips64Assembler::Lui(GpuRegister rt, uint16_t imm16) { + EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16); +} + +void Mips64Assembler::Mfhi(GpuRegister rd) { + EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x10); +} + +void Mips64Assembler::Mflo(GpuRegister rd) { + EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x12); +} + +void Mips64Assembler::Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x28, rs, rt, imm16); +} + +void Mips64Assembler::Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x29, rs, rt, imm16); +} + +void Mips64Assembler::Sw(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x2b, rs, rt, imm16); +} + +void Mips64Assembler::Sd(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x3f, rs, rt, imm16); +} + +void Mips64Assembler::Slt(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 0, 0x2a); +} + +void Mips64Assembler::Sltu(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 0, 0x2b); +} + +void Mips64Assembler::Slti(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0xa, rs, rt, imm16); +} + +void Mips64Assembler::Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0xb, rs, rt, imm16); +} + +void Mips64Assembler::Beq(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x4, rs, rt, imm16); + Nop(); +} + +void Mips64Assembler::Bne(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x5, rs, rt, imm16); + Nop(); +} + +void Mips64Assembler::J(uint32_t address) { + EmitJ(0x2, address); + Nop(); +} + +void Mips64Assembler::Jal(uint32_t address) { + EmitJ(0x2, address); + Nop(); +} + +void Mips64Assembler::Jr(GpuRegister rs) { + EmitR(0, rs, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), 0, 0x09); // Jalr zero, rs + Nop(); +} + +void Mips64Assembler::Jalr(GpuRegister rs) { + EmitR(0, rs, static_cast<GpuRegister>(0), RA, 0, 0x09); + Nop(); +} + +void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x10, ft, fs, fd, 0x0); +} + +void Mips64Assembler::SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x10, ft, fs, fd, 0x1); +} + +void Mips64Assembler::MulS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x10, ft, fs, fd, 0x2); +} + +void Mips64Assembler::DivS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x10, ft, fs, fd, 0x3); +} + +void Mips64Assembler::AddD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs), + static_cast<FpuRegister>(fd), 0x0); +} + +void Mips64Assembler::SubD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs), + static_cast<FpuRegister>(fd), 0x1); +} + +void Mips64Assembler::MulD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs), + static_cast<FpuRegister>(fd), 0x2); +} + +void Mips64Assembler::DivD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs), + static_cast<FpuRegister>(fd), 0x3); +} + +void Mips64Assembler::MovS(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x6); +} + +void Mips64Assembler::MovD(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), static_cast<FpuRegister>(fs), + static_cast<FpuRegister>(fd), 0x6); +} + +void Mips64Assembler::Mfc1(GpuRegister rt, FpuRegister fs) { + EmitFR(0x11, 0x00, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0); +} + +void Mips64Assembler::Mtc1(FpuRegister ft, GpuRegister rs) { + EmitFR(0x11, 0x04, ft, static_cast<FpuRegister>(rs), static_cast<FpuRegister>(0), 0x0); +} + +void Mips64Assembler::Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16) { + EmitI(0x31, rs, static_cast<GpuRegister>(ft), imm16); +} + +void Mips64Assembler::Ldc1(FpuRegister ft, GpuRegister rs, uint16_t imm16) { + EmitI(0x35, rs, static_cast<GpuRegister>(ft), imm16); +} + +void Mips64Assembler::Swc1(FpuRegister ft, GpuRegister rs, uint16_t imm16) { + EmitI(0x39, rs, static_cast<GpuRegister>(ft), imm16); +} + +void Mips64Assembler::Sdc1(FpuRegister ft, GpuRegister rs, uint16_t imm16) { + EmitI(0x3d, rs, static_cast<GpuRegister>(ft), imm16); +} + +void Mips64Assembler::Break() { + EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), + static_cast<GpuRegister>(0), 0, 0xD); +} + +void Mips64Assembler::Nop() { + EmitR(0x0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), + static_cast<GpuRegister>(0), 0, 0x0); +} + +void Mips64Assembler::Move(GpuRegister rt, GpuRegister rs) { + EmitI(0x19, rs, rt, 0); // Daddiu +} + +void Mips64Assembler::Clear(GpuRegister rt) { + EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rt, 0, 0x20); +} + +void Mips64Assembler::Not(GpuRegister rt, GpuRegister rs) { + EmitR(0, static_cast<GpuRegister>(0), rs, rt, 0, 0x27); +} + +void Mips64Assembler::Mul(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + Mult(rs, rt); + Mflo(rd); +} + +void Mips64Assembler::Div(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + Div(rs, rt); + Mflo(rd); +} + +void Mips64Assembler::Rem(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + Div(rs, rt); + Mfhi(rd); +} + +void Mips64Assembler::AddConstant64(GpuRegister rt, GpuRegister rs, int32_t value) { + CHECK((value >= -32768) && (value <= 32766)); + Daddiu(rt, rs, value); +} + +void Mips64Assembler::LoadImmediate64(GpuRegister rt, int32_t value) { + CHECK((value >= -32768) && (value <= 32766)); + Daddiu(rt, ZERO, value); +} + +void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, + int32_t offset) { + switch (type) { + case kLoadSignedByte: + Lb(reg, base, offset); + break; + case kLoadUnsignedByte: + Lbu(reg, base, offset); + break; + case kLoadSignedHalfword: + Lh(reg, base, offset); + break; + case kLoadUnsignedHalfword: + Lhu(reg, base, offset); + break; + case kLoadWord: + Lw(reg, base, offset); + break; + case kLoadDoubleword: + // TODO: alignment issues ??? + Ld(reg, base, offset); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } +} + +void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, + int32_t offset) { + CHECK((offset >= -32768) && (offset <= 32766)); + switch (type) { + case kLoadWord: + Lwc1(reg, base, offset); + break; + case kLoadDoubleword: + // TODO: alignment issues ??? + Ldc1(reg, base, offset); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } +} + +void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, + size_t size) { + Mips64ManagedRegister dst = m_dst.AsMips64(); + if (dst.IsNoRegister()) { + CHECK_EQ(0u, size) << dst; + } else if (dst.IsGpuRegister()) { + if (size == 4) { + CHECK_EQ(4u, size) << dst; + LoadFromOffset(kLoadWord, dst.AsGpuRegister(), src_register, src_offset); + } else if (size == 8) { + CHECK_EQ(8u, size) << dst; + LoadFromOffset(kLoadDoubleword, dst.AsGpuRegister(), src_register, src_offset); + } else { + UNIMPLEMENTED(FATAL) << "We only support Load() of size 4 and 8"; + } + } else if (dst.IsFpuRegister()) { + if (size == 4) { + CHECK_EQ(4u, size) << dst; + LoadFpuFromOffset(kLoadWord, dst.AsFpuRegister(), src_register, src_offset); + } else if (size == 8) { + CHECK_EQ(8u, size) << dst; + LoadFpuFromOffset(kLoadDoubleword, dst.AsFpuRegister(), src_register, src_offset); + } else { + UNIMPLEMENTED(FATAL) << "We only support Load() of size 4 and 8"; + } + } +} + +void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, + int32_t offset) { + switch (type) { + case kStoreByte: + Sb(reg, base, offset); + break; + case kStoreHalfword: + Sh(reg, base, offset); + break; + case kStoreWord: + Sw(reg, base, offset); + break; + case kStoreDoubleword: + // TODO: alignment issues ??? + Sd(reg, base, offset); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } +} + +void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base, + int32_t offset) { + switch (type) { + case kStoreWord: + Swc1(reg, base, offset); + break; + case kStoreDoubleword: + Sdc1(reg, base, offset); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } +} + +constexpr size_t kFramePointerSize = 8; + +void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, + const std::vector<ManagedRegister>& callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) { + CHECK_ALIGNED(frame_size, kStackAlignment); + + // Increase frame to required size. + IncreaseFrameSize(frame_size); + + // Push callee saves and return address + int stack_offset = frame_size - kFramePointerSize; + StoreToOffset(kStoreDoubleword, RA, SP, stack_offset); + for (int i = callee_save_regs.size() - 1; i >= 0; --i) { + stack_offset -= kFramePointerSize; + GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister(); + StoreToOffset(kStoreDoubleword, reg, SP, stack_offset); + } + + // Write out Method*. + StoreToOffset(kStoreWord, method_reg.AsMips64().AsGpuRegister(), SP, 0); + + // Write out entry spills. + int32_t offset = frame_size + sizeof(StackReference<mirror::ArtMethod>); + for (size_t i = 0; i < entry_spills.size(); ++i) { + Mips64ManagedRegister reg = entry_spills.at(i).AsMips64(); + ManagedRegisterSpill spill = entry_spills.at(i); + int32_t size = spill.getSize(); + if (reg.IsNoRegister()) { + // only increment stack offset. + offset += size; + } else if (reg.IsFpuRegister()) { + StoreFpuToOffset((size == 4) ? kStoreWord : kStoreDoubleword, reg.AsFpuRegister(), SP, offset); + offset += size; + } else if (reg.IsGpuRegister()) { + StoreToOffset((size == 4) ? kStoreWord : kStoreDoubleword, reg.AsGpuRegister(), SP, offset); + offset += size; + } + } +} + +void Mips64Assembler::RemoveFrame(size_t frame_size, + const std::vector<ManagedRegister>& callee_save_regs) { + CHECK_ALIGNED(frame_size, kStackAlignment); + + // Pop callee saves and return address + int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize; + for (size_t i = 0; i < callee_save_regs.size(); ++i) { + GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister(); + LoadFromOffset(kLoadDoubleword, reg, SP, stack_offset); + stack_offset += kFramePointerSize; + } + LoadFromOffset(kLoadDoubleword, RA, SP, stack_offset); + + // Decrease frame to required size. + DecreaseFrameSize(frame_size); + + // Then jump to the return address. + Jr(RA); +} + +void Mips64Assembler::IncreaseFrameSize(size_t adjust) { + CHECK_ALIGNED(adjust, kStackAlignment); + AddConstant64(SP, SP, -adjust); +} + +void Mips64Assembler::DecreaseFrameSize(size_t adjust) { + CHECK_ALIGNED(adjust, kStackAlignment); + AddConstant64(SP, SP, adjust); +} + +void Mips64Assembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { + Mips64ManagedRegister src = msrc.AsMips64(); + if (src.IsNoRegister()) { + CHECK_EQ(0u, size); + } else if (src.IsGpuRegister()) { + CHECK(size == 4 || size == 8) << size; + if (size == 8) { + StoreToOffset(kStoreDoubleword, src.AsGpuRegister(), SP, dest.Int32Value()); + } else if (size == 4) { + StoreToOffset(kStoreWord, src.AsGpuRegister(), SP, dest.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Store() of size 4 and 8"; + } + } else if (src.IsFpuRegister()) { + CHECK(size == 4 || size == 8) << size; + if (size == 8) { + StoreFpuToOffset(kStoreDoubleword, src.AsFpuRegister(), SP, dest.Int32Value()); + } else if (size == 4) { + StoreFpuToOffset(kStoreWord, src.AsFpuRegister(), SP, dest.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Store() of size 4 and 8"; + } + } +} + +void Mips64Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { + Mips64ManagedRegister src = msrc.AsMips64(); + CHECK(src.IsGpuRegister()); + StoreToOffset(kStoreWord, src.AsGpuRegister(), SP, dest.Int32Value()); +} + +void Mips64Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { + Mips64ManagedRegister src = msrc.AsMips64(); + CHECK(src.IsGpuRegister()); + StoreToOffset(kStoreDoubleword, src.AsGpuRegister(), SP, dest.Int32Value()); +} + +void Mips64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, + ManagedRegister mscratch) { + Mips64ManagedRegister scratch = mscratch.AsMips64(); + CHECK(scratch.IsGpuRegister()) << scratch; + LoadImmediate64(scratch.AsGpuRegister(), imm); + StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value()); +} + +void Mips64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, + ManagedRegister mscratch) { + Mips64ManagedRegister scratch = mscratch.AsMips64(); + CHECK(scratch.IsGpuRegister()) << scratch; + LoadImmediate64(scratch.AsGpuRegister(), imm); + StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value()); +} + +void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { + Mips64ManagedRegister scratch = mscratch.AsMips64(); + CHECK(scratch.IsGpuRegister()) << scratch; + AddConstant64(scratch.AsGpuRegister(), SP, fr_offs.Int32Value()); + StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value()); +} + +void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) { + StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value()); +} + +void Mips64Assembler::StoreSpanning(FrameOffset dest, ManagedRegister msrc, + FrameOffset in_off, ManagedRegister mscratch) { + Mips64ManagedRegister src = msrc.AsMips64(); + Mips64ManagedRegister scratch = mscratch.AsMips64(); + StoreToOffset(kStoreDoubleword, src.AsGpuRegister(), SP, dest.Int32Value()); + LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), SP, in_off.Int32Value()); + StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, dest.Int32Value() + 8); +} + +void Mips64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) { + return EmitLoad(mdest, SP, src.Int32Value(), size); +} + +void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) { + return EmitLoad(mdest, S1, src.Int32Value(), size); +} + +void Mips64Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) { + Mips64ManagedRegister dest = mdest.AsMips64(); + CHECK(dest.IsGpuRegister()); + LoadFromOffset(kLoadWord, dest.AsGpuRegister(), SP, src.Int32Value()); +} + +void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, + MemberOffset offs) { + Mips64ManagedRegister dest = mdest.AsMips64(); + CHECK(dest.IsGpuRegister() && dest.IsGpuRegister()); + LoadFromOffset(kLoadWord, dest.AsGpuRegister(), + base.AsMips64().AsGpuRegister(), offs.Int32Value()); + if (kPoisonHeapReferences) { + Subu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister()); + } +} + +void Mips64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, + Offset offs) { + Mips64ManagedRegister dest = mdest.AsMips64(); + CHECK(dest.IsGpuRegister() && dest.IsGpuRegister()) << dest; + LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), + base.AsMips64().AsGpuRegister(), offs.Int32Value()); +} + +void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, + ThreadOffset<8> offs) { + Mips64ManagedRegister dest = mdest.AsMips64(); + CHECK(dest.IsGpuRegister()); + LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value()); +} + +void Mips64Assembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) { + UNIMPLEMENTED(FATAL) << "no sign extension necessary for mips"; +} + +void Mips64Assembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) { + UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips"; +} + +void Mips64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) { + Mips64ManagedRegister dest = mdest.AsMips64(); + Mips64ManagedRegister src = msrc.AsMips64(); + if (!dest.Equals(src)) { + if (dest.IsGpuRegister()) { + CHECK(src.IsGpuRegister()) << src; + Move(dest.AsGpuRegister(), src.AsGpuRegister()); + } else if (dest.IsFpuRegister()) { + CHECK(src.IsFpuRegister()) << src; + if (size == 4) { + MovS(dest.AsFpuRegister(), src.AsFpuRegister()); + } else if (size == 8) { + MovD(dest.AsFpuRegister(), src.AsFpuRegister()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; + } + } + } +} + +void Mips64Assembler::CopyRef(FrameOffset dest, FrameOffset src, + ManagedRegister mscratch) { + Mips64ManagedRegister scratch = mscratch.AsMips64(); + CHECK(scratch.IsGpuRegister()) << scratch; + LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), SP, src.Int32Value()); + StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value()); +} + +void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, + ThreadOffset<8> thr_offs, + ManagedRegister mscratch) { + Mips64ManagedRegister scratch = mscratch.AsMips64(); + CHECK(scratch.IsGpuRegister()) << scratch; + LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value()); + StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value()); +} + +void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { + Mips64ManagedRegister scratch = mscratch.AsMips64(); + CHECK(scratch.IsGpuRegister()) << scratch; + LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), + SP, fr_offs.Int32Value()); + StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), + S1, thr_offs.Int32Value()); +} + +void Mips64Assembler::Copy(FrameOffset dest, FrameOffset src, + ManagedRegister mscratch, size_t size) { + Mips64ManagedRegister scratch = mscratch.AsMips64(); + CHECK(scratch.IsGpuRegister()) << scratch; + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), SP, src.Int32Value()); + StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value()); + } else if (size == 8) { + LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), SP, src.Int32Value()); + StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, dest.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; + } +} + +void Mips64Assembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, + ManagedRegister mscratch, size_t size) { + GpuRegister scratch = mscratch.AsMips64().AsGpuRegister(); + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + LoadFromOffset(kLoadWord, scratch, src_base.AsMips64().AsGpuRegister(), + src_offset.Int32Value()); + StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value()); + } else if (size == 8) { + LoadFromOffset(kLoadDoubleword, scratch, src_base.AsMips64().AsGpuRegister(), + src_offset.Int32Value()); + StoreToOffset(kStoreDoubleword, scratch, SP, dest.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; + } +} + +void Mips64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, + ManagedRegister mscratch, size_t size) { + GpuRegister scratch = mscratch.AsMips64().AsGpuRegister(); + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value()); + StoreToOffset(kStoreWord, scratch, dest_base.AsMips64().AsGpuRegister(), + dest_offset.Int32Value()); + } else if (size == 8) { + LoadFromOffset(kLoadDoubleword, scratch, SP, src.Int32Value()); + StoreToOffset(kStoreDoubleword, scratch, dest_base.AsMips64().AsGpuRegister(), + dest_offset.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; + } +} + +void Mips64Assembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/, + ManagedRegister /*mscratch*/, size_t /*size*/) { + UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +} + +void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset, + ManagedRegister src, Offset src_offset, + ManagedRegister mscratch, size_t size) { + GpuRegister scratch = mscratch.AsMips64().AsGpuRegister(); + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + LoadFromOffset(kLoadWord, scratch, src.AsMips64().AsGpuRegister(), src_offset.Int32Value()); + StoreToOffset(kStoreWord, scratch, dest.AsMips64().AsGpuRegister(), dest_offset.Int32Value()); + } else if (size == 8) { + LoadFromOffset(kLoadDoubleword, scratch, src.AsMips64().AsGpuRegister(), + src_offset.Int32Value()); + StoreToOffset(kStoreDoubleword, scratch, dest.AsMips64().AsGpuRegister(), + dest_offset.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; + } +} + +void Mips64Assembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset +/*src_offset*/, + ManagedRegister /*mscratch*/, size_t /*size*/) { + UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +} + +void Mips64Assembler::MemoryBarrier(ManagedRegister) { + UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +} + +void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, + FrameOffset handle_scope_offset, + ManagedRegister min_reg, bool null_allowed) { + Mips64ManagedRegister out_reg = mout_reg.AsMips64(); + Mips64ManagedRegister in_reg = min_reg.AsMips64(); + CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg; + CHECK(out_reg.IsGpuRegister()) << out_reg; + if (null_allowed) { + Label null_arg; + // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is + // the address in the handle scope holding the reference. + // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) + if (in_reg.IsNoRegister()) { + LoadFromOffset(kLoadWord, out_reg.AsGpuRegister(), + SP, handle_scope_offset.Int32Value()); + in_reg = out_reg; + } + if (!out_reg.Equals(in_reg)) { + LoadImmediate64(out_reg.AsGpuRegister(), 0); + } + EmitBranch(in_reg.AsGpuRegister(), ZERO, &null_arg, true); + AddConstant64(out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); + Bind(&null_arg, false); + } else { + AddConstant64(out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); + } +} + +void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handle_scope_offset, + ManagedRegister mscratch, + bool null_allowed) { + Mips64ManagedRegister scratch = mscratch.AsMips64(); + CHECK(scratch.IsGpuRegister()) << scratch; + if (null_allowed) { + Label null_arg; + LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), SP, + handle_scope_offset.Int32Value()); + // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is + // the address in the handle scope holding the reference. + // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset) + EmitBranch(scratch.AsGpuRegister(), ZERO, &null_arg, true); + AddConstant64(scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); + Bind(&null_arg, false); + } else { + AddConstant64(scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); + } + StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, out_off.Int32Value()); +} + +// Given a handle scope entry, load the associated reference. +void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, + ManagedRegister min_reg) { + Mips64ManagedRegister out_reg = mout_reg.AsMips64(); + Mips64ManagedRegister in_reg = min_reg.AsMips64(); + CHECK(out_reg.IsGpuRegister()) << out_reg; + CHECK(in_reg.IsGpuRegister()) << in_reg; + Label null_arg; + if (!out_reg.Equals(in_reg)) { + LoadImmediate64(out_reg.AsGpuRegister(), 0); + } + EmitBranch(in_reg.AsGpuRegister(), ZERO, &null_arg, true); + LoadFromOffset(kLoadDoubleword, out_reg.AsGpuRegister(), + in_reg.AsGpuRegister(), 0); + Bind(&null_arg, false); +} + +void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { + // TODO: not validating references +} + +void Mips64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { + // TODO: not validating references +} + +void Mips64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister mscratch) { + Mips64ManagedRegister base = mbase.AsMips64(); + Mips64ManagedRegister scratch = mscratch.AsMips64(); + CHECK(base.IsGpuRegister()) << base; + CHECK(scratch.IsGpuRegister()) << scratch; + LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), + base.AsGpuRegister(), offset.Int32Value()); + Jalr(scratch.AsGpuRegister()); + // TODO: place reference map on call +} + +void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) { + Mips64ManagedRegister scratch = mscratch.AsMips64(); + CHECK(scratch.IsGpuRegister()) << scratch; + // Call *(*(SP + base) + offset) + LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), + SP, base.Int32Value()); + LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), + scratch.AsGpuRegister(), offset.Int32Value()); + Jalr(scratch.AsGpuRegister()); + // TODO: place reference map on call +} + +void Mips64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*mscratch*/) { + UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +} + +void Mips64Assembler::GetCurrentThread(ManagedRegister tr) { + Move(tr.AsMips64().AsGpuRegister(), S1); +} + +void Mips64Assembler::GetCurrentThread(FrameOffset offset, + ManagedRegister /*mscratch*/) { + StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value()); +} + +void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) { + Mips64ManagedRegister scratch = mscratch.AsMips64(); + Mips64ExceptionSlowPath* slow = new Mips64ExceptionSlowPath(scratch, stack_adjust); + buffer_.EnqueueSlowPath(slow); + LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), + S1, Thread::ExceptionOffset<8>().Int32Value()); + EmitBranch(scratch.AsGpuRegister(), ZERO, slow->Entry(), false); +} + +void Mips64ExceptionSlowPath::Emit(Assembler* sasm) { + Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm); +#define __ sp_asm-> + __ Bind(&entry_, false); + if (stack_adjust_ != 0) { // Fix up the frame. + __ DecreaseFrameSize(stack_adjust_); + } + // Pass exception object as argument + // Don't care about preserving A0 as this call won't return + __ Move(A0, scratch_.AsGpuRegister()); + // Set up call to Thread::Current()->pDeliverException + __ LoadFromOffset(kLoadDoubleword, T9, S1, + QUICK_ENTRYPOINT_OFFSET(4, pDeliverException).Int32Value()); + __ Jr(T9); + // Call never returns + __ Break(); +#undef __ +} + +} // namespace mips64 +} // namespace art diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h new file mode 100644 index 0000000000..36e74d7cb2 --- /dev/null +++ b/compiler/utils/mips64/assembler_mips64.h @@ -0,0 +1,294 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ +#define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ + +#include <vector> + +#include "base/macros.h" +#include "constants_mips64.h" +#include "globals.h" +#include "managed_register_mips64.h" +#include "utils/assembler.h" +#include "offsets.h" +#include "utils.h" + +namespace art { +namespace mips64 { + +enum LoadOperandType { + kLoadSignedByte, + kLoadUnsignedByte, + kLoadSignedHalfword, + kLoadUnsignedHalfword, + kLoadWord, + kLoadDoubleword +}; + +enum StoreOperandType { + kStoreByte, + kStoreHalfword, + kStoreWord, + kStoreDoubleword +}; + +class Mips64Assembler FINAL : public Assembler { + public: + Mips64Assembler() {} + virtual ~Mips64Assembler() {} + + // Emit Machine Instructions. + void Add(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Mult(GpuRegister rs, GpuRegister rt); + void Multu(GpuRegister rs, GpuRegister rt); + void Div(GpuRegister rs, GpuRegister rt); + void Divu(GpuRegister rs, GpuRegister rt); + + void And(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Or(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Ori(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Xor(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt); + + void Sll(GpuRegister rd, GpuRegister rs, int shamt); + void Srl(GpuRegister rd, GpuRegister rs, int shamt); + void Sra(GpuRegister rd, GpuRegister rs, int shamt); + void Sllv(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Srlv(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Srav(GpuRegister rd, GpuRegister rs, GpuRegister rt); + + void Lb(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Lh(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Lw(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Lui(GpuRegister rt, uint16_t imm16); + void Mfhi(GpuRegister rd); + void Mflo(GpuRegister rd); + + void Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Sw(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Sd(GpuRegister rt, GpuRegister rs, uint16_t imm16); + + void Slt(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Sltu(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Slti(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16); + + void Beq(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Bne(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void J(uint32_t address); + void Jal(uint32_t address); + void Jr(GpuRegister rs); + void Jalr(GpuRegister rs); + + void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void MulS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void DivS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void AddD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void SubD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void MulD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void DivD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void MovS(FpuRegister fd, FpuRegister fs); + void MovD(FpuRegister fd, FpuRegister fs); + + void Mfc1(GpuRegister rt, FpuRegister fs); + void Mtc1(FpuRegister ft, GpuRegister rs); + void Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16); + void Ldc1(FpuRegister ft, GpuRegister rs, uint16_t imm16); + void Swc1(FpuRegister ft, GpuRegister rs, uint16_t imm16); + void Sdc1(FpuRegister ft, GpuRegister rs, uint16_t imm16); + + void Break(); + void Nop(); + void Move(GpuRegister rt, GpuRegister rs); + void Clear(GpuRegister rt); + void Not(GpuRegister rt, GpuRegister rs); + void Mul(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Div(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Rem(GpuRegister rd, GpuRegister rs, GpuRegister rt); + + void AddConstant64(GpuRegister rt, GpuRegister rs, int32_t value); + void LoadImmediate64(GpuRegister rt, int32_t value); + + void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); + void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); + void LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, int32_t offset); + void StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); + void StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base, int32_t offset); + + // Emit data (e.g. encoded instruction or immediate) to the instruction stream. + void Emit(int32_t value); + void EmitBranch(GpuRegister rt, GpuRegister rs, Label* label, bool equal); + void EmitJump(Label* label, bool link); + void Bind(Label* label, bool is_jump); + + // + // Overridden common assembler high-level functionality + // + + // Emit code that will create an activation on the stack + void BuildFrame(size_t frame_size, ManagedRegister method_reg, + const std::vector<ManagedRegister>& callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + + // Emit code that will remove an activation from the stack + void RemoveFrame(size_t frame_size, + const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE; + + void IncreaseFrameSize(size_t adjust) OVERRIDE; + void DecreaseFrameSize(size_t adjust) OVERRIDE; + + // Store routines + void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE; + void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE; + void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE; + + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE; + + void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, + ManagedRegister mscratch) OVERRIDE; + + void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + ManagedRegister mscratch) OVERRIDE; + + void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE; + + void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off, + ManagedRegister mscratch) OVERRIDE; + + // Load routines + void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE; + + void LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) OVERRIDE; + + void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + + void LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs) OVERRIDE; + + void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE; + + void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) OVERRIDE; + + // Copying routines + void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE; + + void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs, + ManagedRegister mscratch) OVERRIDE; + + void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + ManagedRegister mscratch) OVERRIDE; + + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE; + + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE; + + void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister mscratch, + size_t size) OVERRIDE; + + void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, + ManagedRegister mscratch, size_t size) OVERRIDE; + + void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister mscratch, + size_t size) OVERRIDE; + + void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, + ManagedRegister mscratch, size_t size) OVERRIDE; + + void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, + ManagedRegister mscratch, size_t size) OVERRIDE; + + void MemoryBarrier(ManagedRegister) OVERRIDE; + + // Sign extension + void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Zero extension + void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Exploit fast access in managed code to Thread::Current() + void GetCurrentThread(ManagedRegister tr) OVERRIDE; + void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE; + + // Set up out_reg to hold a Object** into the handle scope, or to be NULL if the + // value is null and null_allowed. in_reg holds a possibly stale reference + // that can be used to avoid loading the handle scope entry to see if the value is + // NULL. + void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, + ManagedRegister in_reg, bool null_allowed) OVERRIDE; + + // Set up out_off to hold a Object** into the handle scope, or to be NULL if the + // value is null and null_allowed. + void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister + mscratch, bool null_allowed) OVERRIDE; + + // src holds a handle scope entry (Object**) load this into dst + void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; + + // Heap::VerifyObject on src. In some cases (such as a reference to this) we + // know that src may not be null. + void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; + void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + + // Call to address held at [base+offset] + void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE; + void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE; + void CallFromThread64(ThreadOffset<8> offset, ManagedRegister mscratch) OVERRIDE; + + // Generate code to check if Thread::Current()->exception_ is non-null + // and branch to a ExceptionSlowPath if it is. + void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE; + + private: + void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct); + void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm); + void EmitJ(int opcode, int address); + void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct); + void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm); + + int32_t EncodeBranchOffset(int offset, int32_t inst, bool is_jump); + int DecodeBranchOffset(int32_t inst, bool is_jump); + + DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); +}; + +// Slowpath entered when Thread::Current()->_exception is non-null +class Mips64ExceptionSlowPath FINAL : public SlowPath { + public: + explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) + : scratch_(scratch), stack_adjust_(stack_adjust) {} + virtual void Emit(Assembler *sp_asm) OVERRIDE; + private: + const Mips64ManagedRegister scratch_; + const size_t stack_adjust_; +}; + +} // namespace mips64 +} // namespace art + +#endif // ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ diff --git a/compiler/utils/mips64/constants_mips64.h b/compiler/utils/mips64/constants_mips64.h new file mode 100644 index 0000000000..8b7697cac3 --- /dev/null +++ b/compiler/utils/mips64/constants_mips64.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_MIPS64_CONSTANTS_MIPS64_H_ +#define ART_COMPILER_UTILS_MIPS64_CONSTANTS_MIPS64_H_ + +#include <iosfwd> + +#include "arch/mips64/registers_mips64.h" +#include "base/logging.h" +#include "base/macros.h" +#include "globals.h" + +namespace art { +namespace mips64 { + +// Constants used for the decoding or encoding of the individual fields of instructions. +enum InstructionFields { + kOpcodeShift = 26, + kOpcodeBits = 6, + kRsShift = 21, + kRsBits = 5, + kRtShift = 16, + kRtBits = 5, + kRdShift = 11, + kRdBits = 5, + kShamtShift = 6, + kShamtBits = 5, + kFunctShift = 0, + kFunctBits = 6, + + kFmtShift = 21, + kFmtBits = 5, + kFtShift = 16, + kFtBits = 5, + kFsShift = 11, + kFsBits = 5, + kFdShift = 6, + kFdBits = 5, + + kBranchOffsetMask = 0x0000ffff, + kJumpOffsetMask = 0x03ffffff, +}; + +enum ScaleFactor { + TIMES_1 = 0, + TIMES_2 = 1, + TIMES_4 = 2, + TIMES_8 = 3 +}; + +class Instr { + public: + static const uint32_t kBreakPointInstruction = 0x0000000D; + + bool IsBreakPoint() { + return ((*reinterpret_cast<const uint32_t*>(this)) & 0xFC0000CF) == kBreakPointInstruction; + } + + // Instructions are read out of a code stream. The only way to get a + // reference to an instruction is to convert a pointer. There is no way + // to allocate or create instances of class Instr. + // Use the At(pc) function to create references to Instr. + static Instr* At(uintptr_t pc) { return reinterpret_cast<Instr*>(pc); } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Instr); +}; + +} // namespace mips64 +} // namespace art + +#endif // ART_COMPILER_UTILS_MIPS64_CONSTANTS_MIPS64_H_ diff --git a/compiler/utils/mips64/managed_register_mips64.cc b/compiler/utils/mips64/managed_register_mips64.cc new file mode 100644 index 0000000000..dea396e4a7 --- /dev/null +++ b/compiler/utils/mips64/managed_register_mips64.cc @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "managed_register_mips64.h" + +#include "globals.h" + +namespace art { +namespace mips64 { + +bool Mips64ManagedRegister::Overlaps(const Mips64ManagedRegister& other) const { + if (IsNoRegister() || other.IsNoRegister()) return false; + CHECK(IsValidManagedRegister()); + CHECK(other.IsValidManagedRegister()); + if (Equals(other)) return true; + return false; +} + +void Mips64ManagedRegister::Print(std::ostream& os) const { + if (!IsValidManagedRegister()) { + os << "No Register"; + } else if (IsGpuRegister()) { + os << "GPU: " << static_cast<int>(AsGpuRegister()); + } else if (IsFpuRegister()) { + os << "FpuRegister: " << static_cast<int>(AsFpuRegister()); + } else { + os << "??: " << RegId(); + } +} + +std::ostream& operator<<(std::ostream& os, const Mips64ManagedRegister& reg) { + reg.Print(os); + return os; +} + +} // namespace mips64 +} // namespace art diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h new file mode 100644 index 0000000000..924a928389 --- /dev/null +++ b/compiler/utils/mips64/managed_register_mips64.h @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_MIPS64_MANAGED_REGISTER_MIPS64_H_ +#define ART_COMPILER_UTILS_MIPS64_MANAGED_REGISTER_MIPS64_H_ + +#include "constants_mips64.h" +#include "utils/managed_register.h" + +namespace art { +namespace mips64 { + +const int kNumberOfGpuRegIds = kNumberOfGpuRegisters; +const int kNumberOfGpuAllocIds = kNumberOfGpuRegisters; + +const int kNumberOfFpuRegIds = kNumberOfFpuRegisters; +const int kNumberOfFpuAllocIds = kNumberOfFpuRegisters; + +const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds; +const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds; + +// An instance of class 'ManagedRegister' represents a single GPU register (enum +// Register) or a double precision FP register (enum FpuRegister) +// 'ManagedRegister::NoRegister()' provides an invalid register. +// There is a one-to-one mapping between ManagedRegister and register id. +class Mips64ManagedRegister : public ManagedRegister { + public: + GpuRegister AsGpuRegister() const { + CHECK(IsGpuRegister()); + return static_cast<GpuRegister>(id_); + } + + FpuRegister AsFpuRegister() const { + CHECK(IsFpuRegister()); + return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds); + } + + bool IsGpuRegister() const { + CHECK(IsValidManagedRegister()); + return (0 <= id_) && (id_ < kNumberOfGpuRegIds); + } + + bool IsFpuRegister() const { + CHECK(IsValidManagedRegister()); + const int test = id_ - kNumberOfGpuRegIds; + return (0 <= test) && (test < kNumberOfFpuRegIds); + } + + void Print(std::ostream& os) const; + + // Returns true if the two managed-registers ('this' and 'other') overlap. + // Either managed-register may be the NoRegister. If both are the NoRegister + // then false is returned. + bool Overlaps(const Mips64ManagedRegister& other) const; + + static Mips64ManagedRegister FromGpuRegister(GpuRegister r) { + CHECK_NE(r, kNoGpuRegister); + return FromRegId(r); + } + + static Mips64ManagedRegister FromFpuRegister(FpuRegister r) { + CHECK_NE(r, kNoFpuRegister); + return FromRegId(r + kNumberOfGpuRegIds); + } + + private: + bool IsValidManagedRegister() const { + return (0 <= id_) && (id_ < kNumberOfRegIds); + } + + int RegId() const { + CHECK(!IsNoRegister()); + return id_; + } + + int AllocId() const { + CHECK(IsValidManagedRegister()); + CHECK_LT(id_, kNumberOfAllocIds); + return id_; + } + + int AllocIdLow() const; + int AllocIdHigh() const; + + friend class ManagedRegister; + + explicit Mips64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {} + + static Mips64ManagedRegister FromRegId(int reg_id) { + Mips64ManagedRegister reg(reg_id); + CHECK(reg.IsValidManagedRegister()); + return reg; + } +}; + +std::ostream& operator<<(std::ostream& os, const Mips64ManagedRegister& reg); + +} // namespace mips64 + +inline mips64::Mips64ManagedRegister ManagedRegister::AsMips64() const { + mips64::Mips64ManagedRegister reg(id_); + CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister()); + return reg; +} + +} // namespace art + +#endif // ART_COMPILER_UTILS_MIPS64_MANAGED_REGISTER_MIPS64_H_ diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index d843a7213f..c7414a12fc 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -184,6 +184,20 @@ void X86_64Assembler::movl(const Address& dst, const Immediate& imm) { EmitImmediate(imm); } + +void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) { + cmov(c, dst, src, true); +} + +void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x40 + c); + EmitRegisterOperand(dst.LowBits(), src.LowBits()); +} + + void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalByteRegNormalizingRex32(dst, src); @@ -369,19 +383,26 @@ void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) { void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) { + movd(dst, src, true); +} + +void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) { + movd(dst, src, true); +} + +void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); - EmitRex64(dst, src); + EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex()); EmitUint8(0x0F); EmitUint8(0x6E); EmitOperand(dst.LowBits(), Operand(src)); } - -void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) { +void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); - EmitRex64(src, dst); + EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex()); EmitUint8(0x0F); EmitUint8(0x7E); EmitOperand(src.LowBits(), Operand(dst)); @@ -826,6 +847,39 @@ void X86_64Assembler::andpd(XmmRegister dst, const Address& src) { EmitOperand(dst.LowBits(), src); } +void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x54); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x54); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x56); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x56); + EmitXmmRegisterOperand(dst.LowBits(), src); +} void X86_64Assembler::fldl(const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -1757,6 +1811,20 @@ void X86_64Assembler::setcc(Condition condition, CpuRegister dst) { EmitUint8(0xC0 + dst.LowBits()); } +void X86_64Assembler::bswapl(CpuRegister dst) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex(false, false, false, false, dst.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0xC8 + dst.LowBits()); +} + +void X86_64Assembler::bswapq(CpuRegister dst) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex(false, true, false, false, dst.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0xC8 + dst.LowBits()); +} + void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) { // TODO: Need to have a code constants table. diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index ac8bc9ab49..5c8d608cf2 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -276,6 +276,9 @@ class X86_64Assembler FINAL : public Assembler { void movl(const Address& dst, CpuRegister src); void movl(const Address& dst, const Immediate& imm); + void cmov(Condition c, CpuRegister dst, CpuRegister src); // This is the 64b version. + void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit); + void movzxb(CpuRegister dst, CpuRegister src); void movzxb(CpuRegister dst, const Address& src); void movsxb(CpuRegister dst, CpuRegister src); @@ -303,8 +306,10 @@ class X86_64Assembler FINAL : public Assembler { void movsxd(CpuRegister dst, CpuRegister src); void movsxd(CpuRegister dst, const Address& src); - void movd(XmmRegister dst, CpuRegister src); - void movd(CpuRegister dst, XmmRegister src); + void movd(XmmRegister dst, CpuRegister src); // Note: this is the r64 version, formally movq. + void movd(CpuRegister dst, XmmRegister src); // Note: this is the r64 version, formally movq. + void movd(XmmRegister dst, CpuRegister src, bool is64bit); + void movd(CpuRegister dst, XmmRegister src, bool is64bit); void addss(XmmRegister dst, XmmRegister src); void addss(XmmRegister dst, const Address& src); @@ -360,6 +365,11 @@ class X86_64Assembler FINAL : public Assembler { void xorps(XmmRegister dst, XmmRegister src); void andpd(XmmRegister dst, const Address& src); + void andpd(XmmRegister dst, XmmRegister src); + void andps(XmmRegister dst, XmmRegister src); + + void orpd(XmmRegister dst, XmmRegister src); + void orps(XmmRegister dst, XmmRegister src); void flds(const Address& src); void fstps(const Address& dst); @@ -504,6 +514,9 @@ class X86_64Assembler FINAL : public Assembler { void setcc(Condition condition, CpuRegister dst); + void bswapl(CpuRegister dst); + void bswapq(CpuRegister dst); + // // Macros for High-level operations. // diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index b8d724d771..6df4144004 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -330,7 +330,6 @@ std::string shlq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Ass assembler->shlq(*reg, shifter); str << "shlq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n"; } - printf("%s\n", str.str().c_str()); return str.str(); } @@ -690,6 +689,22 @@ TEST_F(AssemblerX86_64Test, Xorpd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd"); } +TEST_F(AssemblerX86_64Test, Andps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::andps, "andps %{reg2}, %{reg1}"), "andps"); +} + +TEST_F(AssemblerX86_64Test, Andpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::andpd, "andpd %{reg2}, %{reg1}"), "andpd"); +} + +TEST_F(AssemblerX86_64Test, Orps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::orps, "orps %{reg2}, %{reg1}"), "orps"); +} + +TEST_F(AssemblerX86_64Test, Orpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd"); +} + // X87 std::string x87_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, @@ -758,6 +773,14 @@ TEST_F(AssemblerX86_64Test, RetAndLeave) { // MISC // ////////// +TEST_F(AssemblerX86_64Test, Bswapl) { + DriverStr(Repeatr(&x86_64::X86_64Assembler::bswapl, "bswap %{reg}"), "bswapl"); +} + +TEST_F(AssemblerX86_64Test, Bswapq) { + DriverStr(RepeatR(&x86_64::X86_64Assembler::bswapq, "bswap %{reg}"), "bswapq"); +} + std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { // From Condition |