summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/builder.cc2
-rw-r--r--compiler/optimizing/code_generator.cc7
-rw-r--r--compiler/optimizing/code_generator.h4
-rw-r--r--compiler/optimizing/code_generator_arm.cc2
-rw-r--r--compiler/optimizing/code_generator_arm.h5
-rw-r--r--compiler/optimizing/code_generator_arm64.cc2
-rw-r--r--compiler/optimizing/code_generator_arm64.h5
-rw-r--r--compiler/optimizing/code_generator_x86.cc2
-rw-r--r--compiler/optimizing/code_generator_x86.h5
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc89
-rw-r--r--compiler/optimizing/code_generator_x86_64.h23
-rw-r--r--compiler/optimizing/inliner.cc4
-rw-r--r--compiler/optimizing/intrinsics.cc366
-rw-r--r--compiler/optimizing/intrinsics.h86
-rw-r--r--compiler/optimizing/intrinsics_list.h87
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc984
-rw-r--r--compiler/optimizing/intrinsics_x86_64.h83
-rw-r--r--compiler/optimizing/locations.cc7
-rw-r--r--compiler/optimizing/locations.h11
-rw-r--r--compiler/optimizing/nodes.h73
-rw-r--r--compiler/optimizing/optimizing_compiler.cc10
-rw-r--r--compiler/optimizing/optimizing_unit_test.h8
-rw-r--r--compiler/optimizing/parallel_move_test.cc11
-rw-r--r--compiler/optimizing/register_allocator.cc50
-rw-r--r--compiler/optimizing/register_allocator.h8
-rw-r--r--compiler/optimizing/register_allocator_test.cc102
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h4
27 files changed, 1939 insertions, 101 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index f6ca6c740e..9c2facb75e 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -604,7 +604,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
HInvoke* invoke = nullptr;
if (optimized_invoke_type == kVirtual) {
invoke = new (arena_) HInvokeVirtual(
- arena_, number_of_arguments, return_type, dex_pc, table_index);
+ arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
} else if (optimized_invoke_type == kInterface) {
invoke = new (arena_) HInvokeInterface(
arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 0c1ff9bff5..9e8907078b 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -54,6 +54,7 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
+ GetGraph()->GetTemporariesVRegSlots()
+ 1 /* filler */,
0, /* the baseline compiler does not have live registers at slow path */
+ 0, /* the baseline compiler does not have live registers at slow path */
GetGraph()->GetMaximumNumberOfOutVRegs()
+ 1 /* current method */);
GenerateFrameEntry();
@@ -136,14 +137,16 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l
}
void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots,
- size_t maximum_number_of_live_registers,
+ size_t maximum_number_of_live_core_registers,
+ size_t maximum_number_of_live_fp_registers,
size_t number_of_out_slots) {
first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
SetFrameSize(RoundUp(
number_of_spill_slots * kVRegSize
+ number_of_out_slots * kVRegSize
- + maximum_number_of_live_registers * GetWordSize()
+ + maximum_number_of_live_core_registers * GetWordSize()
+ + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
+ FrameEntrySpillSize(),
kStackAlignment));
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 8d28f3da25..88e50b6c88 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -109,9 +109,11 @@ class CodeGenerator {
virtual HGraphVisitor* GetInstructionVisitor() = 0;
virtual Assembler* GetAssembler() = 0;
virtual size_t GetWordSize() const = 0;
+ virtual size_t GetFloatingPointSpillSlotSize() const = 0;
virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
void ComputeFrameSize(size_t number_of_spill_slots,
- size_t maximum_number_of_live_registers,
+ size_t maximum_number_of_live_core_registers,
+ size_t maximum_number_of_live_fp_registers,
size_t number_of_out_slots);
virtual size_t FrameEntrySpillSize() const = 0;
int32_t GetStackSlot(HLocal* local) const;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1862061bcf..c4ba0fd3e5 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1190,7 +1190,7 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec
kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
// temp = temp[index_in_cache]
__ LoadFromOffset(
- kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()));
+ kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
// LR = temp[offset_of_quick_compiled_code]
__ LoadFromOffset(kLoadWord, LR, temp,
mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 8b29b159ab..267d9a2cef 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -169,6 +169,11 @@ class CodeGeneratorARM : public CodeGenerator {
return kArmWordSize;
}
+ size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ // Allocated in S registers, which are word sized.
+ return kArmWordSize;
+ }
+
size_t FrameEntrySpillSize() const OVERRIDE;
HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7b19f44e78..6d2c3de5d5 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1975,7 +1975,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir
// Make sure that ArtMethod* is passed in W0 as per the calling convention
DCHECK(temp.Is(w0));
size_t index_in_cache = mirror::Array::DataOffset(kHeapRefSize).SizeValue() +
- invoke->GetIndexInDexCache() * kHeapRefSize;
+ invoke->GetDexMethodIndex() * kHeapRefSize;
// TODO: Implement all kinds of calls:
// 1) boot -> boot
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index e4da07be43..590bc1d778 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -191,6 +191,11 @@ class CodeGeneratorARM64 : public CodeGenerator {
return kArm64WordSize;
}
+ size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ // Allocated in D registers, which are word sized.
+ return kArm64WordSize;
+ }
+
uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
vixl::Label* block_entry_label = GetLabelOf(block);
DCHECK(block_entry_label->IsBound());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 04e36cc58a..1a0df44ea6 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1135,7 +1135,7 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec
// temp = temp->dex_cache_resolved_methods_;
__ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
// temp = temp[index_in_cache]
- __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())));
+ __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
// (temp + offset_of_quick_compiled_code)()
__ call(Address(
temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index acde122917..2d8adb2cf1 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -166,6 +166,11 @@ class CodeGeneratorX86 : public CodeGenerator {
return kX86WordSize;
}
+ size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ // 8 bytes == 2 words for each spill.
+ return 2 * kX86WordSize;
+ }
+
size_t FrameEntrySpillSize() const OVERRIDE;
HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5fc24f71e6..3d7f122d36 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -18,6 +18,8 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
+#include "intrinsics.h"
+#include "intrinsics_x86_64.h"
#include "mirror/array-inl.h"
#include "mirror/art_method.h"
#include "mirror/class.h"
@@ -61,20 +63,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatR
#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
-class SlowPathCodeX86_64 : public SlowPathCode {
- public:
- SlowPathCodeX86_64() : entry_label_(), exit_label_() {}
-
- Label* GetEntryLabel() { return &entry_label_; }
- Label* GetExitLabel() { return &exit_label_; }
-
- private:
- Label entry_label_;
- Label exit_label_;
-
- DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64);
-};
-
class NullCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
public:
explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {}
@@ -375,6 +363,31 @@ inline Condition X86_64Condition(IfCondition cond) {
return kEqual;
}
+void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+ CpuRegister temp) {
+ // All registers are assumed to be correctly set up.
+
+ // TODO: Implement all kinds of calls:
+ // 1) boot -> boot
+ // 2) app -> boot
+ // 3) app -> app
+ //
+ // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+ // temp = method;
+ LoadCurrentMethod(temp);
+ // temp = temp->dex_cache_resolved_methods_;
+ __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+ // temp = temp[index_in_cache]
+ __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+ // (temp + offset_of_quick_compiled_code)()
+ __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kX86_64WordSize).SizeValue()));
+
+ DCHECK(!IsLeafMethod());
+ RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
stream << X86_64ManagedRegister::FromCpuRegister(Register(reg));
}
@@ -1123,30 +1136,31 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
}
void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+ if (intrinsic.TryDispatch(invoke)) {
+ return;
+ }
+
HandleInvoke(invoke);
}
-void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
- // TODO: Implement all kinds of calls:
- // 1) boot -> boot
- // 2) app -> boot
- // 3) app -> app
- //
- // Currently we implement the app -> app logic, which looks up in the resolve cache.
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+ if (invoke->GetLocations()->Intrinsified()) {
+ IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
+ intrinsic.Dispatch(invoke);
+ return true;
+ }
+ return false;
+}
- // temp = method;
- codegen_->LoadCurrentMethod(temp);
- // temp = temp->dex_cache_resolved_methods_;
- __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
- // temp = temp[index_in_cache]
- __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())));
- // (temp + offset_of_quick_compiled_code)()
- __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kX86_64WordSize).SizeValue()));
+void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ return;
+ }
- DCHECK(!codegen_->IsLeafMethod());
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ codegen_->GenerateStaticOrDirectCall(
+ invoke,
+ invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>());
}
void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
@@ -1182,10 +1196,19 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
}
void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+ IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+ if (intrinsic.TryDispatch(invoke)) {
+ return;
+ }
+
HandleInvoke(invoke);
}
void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+ if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ return;
+ }
+
CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() +
invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 87f6b0f779..c501568a89 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -36,6 +36,8 @@ static constexpr FloatRegister kParameterFloatRegisters[] =
static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters);
+static constexpr bool kCoalescedImplicitNullCheck = false;
+
class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> {
public:
InvokeDexCallingConvention() : CallingConvention(
@@ -67,7 +69,20 @@ class InvokeDexCallingConventionVisitor {
};
class CodeGeneratorX86_64;
-class SlowPathCodeX86_64;
+
+class SlowPathCodeX86_64 : public SlowPathCode {
+ public:
+ SlowPathCodeX86_64() : entry_label_(), exit_label_() {}
+
+ Label* GetEntryLabel() { return &entry_label_; }
+ Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+ Label entry_label_;
+ Label exit_label_;
+
+ DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64);
+};
class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
public:
@@ -169,6 +184,10 @@ class CodeGeneratorX86_64 : public CodeGenerator {
return kX86_64WordSize;
}
+ size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ return kX86_64WordSize;
+ }
+
size_t FrameEntrySpillSize() const OVERRIDE;
HGraphVisitor* GetLocationBuilder() OVERRIDE {
@@ -222,6 +241,8 @@ class CodeGeneratorX86_64 : public CodeGenerator {
return false;
}
+ void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp);
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 493d93f052..532167c179 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -44,10 +44,10 @@ void HInliner::Run() {
instr_it.Advance()) {
HInvokeStaticOrDirect* current = instr_it.Current()->AsInvokeStaticOrDirect();
if (current != nullptr) {
- if (!TryInline(current, current->GetIndexInDexCache(), current->GetInvokeType())) {
+ if (!TryInline(current, current->GetDexMethodIndex(), current->GetInvokeType())) {
if (kIsDebugBuild) {
std::string callee_name =
- PrettyMethod(current->GetIndexInDexCache(), *outer_compilation_unit_.GetDexFile());
+ PrettyMethod(current->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile());
bool should_inline = callee_name.find("$inline$") != std::string::npos;
CHECK(!should_inline) << "Could not inline " << callee_name;
}
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
new file mode 100644
index 0000000000..fe0e7f2eb2
--- /dev/null
+++ b/compiler/optimizing/intrinsics.cc
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics.h"
+
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "driver/compiler_driver.h"
+#include "invoke_type.h"
+#include "nodes.h"
+#include "quick/inline_method_analyser.h"
+
+namespace art {
+
+// Function that returns whether an intrinsic is static/direct or virtual.
+static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) {
+ switch (i) {
+ case Intrinsics::kNone:
+ return kInterface; // Non-sensical for intrinsic.
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ case Intrinsics::k ## Name: \
+ return IsStatic;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+ }
+ return kInterface;
+}
+
+
+
+static Primitive::Type GetType(uint64_t data, bool is_op_size) {
+ if (is_op_size) {
+ switch (static_cast<OpSize>(data)) {
+ case kSignedByte:
+ return Primitive::Type::kPrimByte;
+ case kSignedHalf:
+ return Primitive::Type::kPrimShort;
+ case k32:
+ return Primitive::Type::kPrimInt;
+ case k64:
+ return Primitive::Type::kPrimLong;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << data;
+ UNREACHABLE();
+ }
+ } else {
+ if ((data & kIntrinsicFlagIsLong) != 0) {
+ return Primitive::Type::kPrimLong;
+ }
+ if ((data & kIntrinsicFlagIsObject) != 0) {
+ return Primitive::Type::kPrimNot;
+ }
+ return Primitive::Type::kPrimInt;
+ }
+}
+
+static Intrinsics GetIntrinsic(InlineMethod method) {
+ switch (method.opcode) {
+ // Floating-point conversions.
+ case kIntrinsicDoubleCvt:
+ return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ?
+ Intrinsics::kDoubleDoubleToRawLongBits : Intrinsics::kDoubleLongBitsToDouble;
+ case kIntrinsicFloatCvt:
+ return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ?
+ Intrinsics::kFloatFloatToRawIntBits : Intrinsics::kFloatIntBitsToFloat;
+
+ // Bit manipulations.
+ case kIntrinsicReverseBits:
+ switch (GetType(method.d.data, true)) {
+ case Primitive::Type::kPrimInt:
+ return Intrinsics::kIntegerReverse;
+ case Primitive::Type::kPrimLong:
+ return Intrinsics::kLongReverse;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+ case kIntrinsicReverseBytes:
+ switch (GetType(method.d.data, true)) {
+ case Primitive::Type::kPrimShort:
+ return Intrinsics::kShortReverseBytes;
+ case Primitive::Type::kPrimInt:
+ return Intrinsics::kIntegerReverseBytes;
+ case Primitive::Type::kPrimLong:
+ return Intrinsics::kLongReverseBytes;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+
+ // Abs.
+ case kIntrinsicAbsDouble:
+ return Intrinsics::kMathAbsDouble;
+ case kIntrinsicAbsFloat:
+ return Intrinsics::kMathAbsFloat;
+ case kIntrinsicAbsInt:
+ return Intrinsics::kMathAbsInt;
+ case kIntrinsicAbsLong:
+ return Intrinsics::kMathAbsLong;
+
+ // Min/max.
+ case kIntrinsicMinMaxDouble:
+ return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+ Intrinsics::kMathMaxDoubleDouble : Intrinsics::kMathMinDoubleDouble;
+ case kIntrinsicMinMaxFloat:
+ return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+ Intrinsics::kMathMaxFloatFloat : Intrinsics::kMathMinFloatFloat;
+ case kIntrinsicMinMaxInt:
+ return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+ Intrinsics::kMathMaxIntInt : Intrinsics::kMathMinIntInt;
+ case kIntrinsicMinMaxLong:
+ return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+ Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong;
+
+ // Misc math.
+ case kIntrinsicSqrt:
+ return Intrinsics::kMathSqrt;
+ case kIntrinsicCeil:
+ return Intrinsics::kMathCeil;
+ case kIntrinsicFloor:
+ return Intrinsics::kMathFloor;
+ case kIntrinsicRint:
+ return Intrinsics::kMathRint;
+ case kIntrinsicRoundDouble:
+ return Intrinsics::kMathRoundDouble;
+ case kIntrinsicRoundFloat:
+ return Intrinsics::kMathRoundFloat;
+
+ // System.arraycopy.
+ case kIntrinsicSystemArrayCopyCharArray:
+ return Intrinsics::kSystemArrayCopyChar;
+
+ // Thread.currentThread.
+ case kIntrinsicCurrentThread:
+ return Intrinsics::kThreadCurrentThread;
+
+ // Memory.peek.
+ case kIntrinsicPeek:
+ switch (GetType(method.d.data, true)) {
+ case Primitive::Type::kPrimByte:
+ return Intrinsics::kMemoryPeekByte;
+ case Primitive::Type::kPrimShort:
+ return Intrinsics::kMemoryPeekShortNative;
+ case Primitive::Type::kPrimInt:
+ return Intrinsics::kMemoryPeekIntNative;
+ case Primitive::Type::kPrimLong:
+ return Intrinsics::kMemoryPeekLongNative;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+
+ // Memory.poke.
+ case kIntrinsicPoke:
+ switch (GetType(method.d.data, true)) {
+ case Primitive::Type::kPrimByte:
+ return Intrinsics::kMemoryPokeByte;
+ case Primitive::Type::kPrimShort:
+ return Intrinsics::kMemoryPokeShortNative;
+ case Primitive::Type::kPrimInt:
+ return Intrinsics::kMemoryPokeIntNative;
+ case Primitive::Type::kPrimLong:
+ return Intrinsics::kMemoryPokeLongNative;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+
+ // String.
+ case kIntrinsicCharAt:
+ return Intrinsics::kStringCharAt;
+ case kIntrinsicCompareTo:
+ return Intrinsics::kStringCompareTo;
+ case kIntrinsicIsEmptyOrLength:
+ return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ?
+ Intrinsics::kStringLength : Intrinsics::kStringIsEmpty;
+ case kIntrinsicIndexOf:
+ return ((method.d.data & kIntrinsicFlagBase0) == 0) ?
+ Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf;
+
+ case kIntrinsicCas:
+ switch (GetType(method.d.data, false)) {
+ case Primitive::Type::kPrimNot:
+ return Intrinsics::kUnsafeCASObject;
+ case Primitive::Type::kPrimInt:
+ return Intrinsics::kUnsafeCASInt;
+ case Primitive::Type::kPrimLong:
+ return Intrinsics::kUnsafeCASLong;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+ case kIntrinsicUnsafeGet: {
+ const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile);
+ switch (GetType(method.d.data, false)) {
+ case Primitive::Type::kPrimInt:
+ return is_volatile ? Intrinsics::kUnsafeGetVolatile : Intrinsics::kUnsafeGet;
+ case Primitive::Type::kPrimLong:
+ return is_volatile ? Intrinsics::kUnsafeGetLongVolatile : Intrinsics::kUnsafeGetLong;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+ }
+ case kIntrinsicUnsafePut: {
+ enum Sync { kNoSync, kVolatile, kOrdered };
+ const Sync sync =
+ ((method.d.data & kIntrinsicFlagIsVolatile) != 0) ? kVolatile :
+ ((method.d.data & kIntrinsicFlagIsOrdered) != 0) ? kOrdered :
+ kNoSync;
+ switch (GetType(method.d.data, false)) {
+ case Primitive::Type::kPrimInt:
+ switch (sync) {
+ case kNoSync:
+ return Intrinsics::kUnsafePut;
+ case kVolatile:
+ return Intrinsics::kUnsafePutVolatile;
+ case kOrdered:
+ return Intrinsics::kUnsafePutOrdered;
+ }
+ break;
+ case Primitive::Type::kPrimLong:
+ switch (sync) {
+ case kNoSync:
+ return Intrinsics::kUnsafePutLong;
+ case kVolatile:
+ return Intrinsics::kUnsafePutLongVolatile;
+ case kOrdered:
+ return Intrinsics::kUnsafePutLongOrdered;
+ }
+ break;
+ case Primitive::Type::kPrimNot:
+ switch (sync) {
+ case kNoSync:
+ return Intrinsics::kUnsafePutObject;
+ case kVolatile:
+ return Intrinsics::kUnsafePutObjectVolatile;
+ case kOrdered:
+ return Intrinsics::kUnsafePutObjectOrdered;
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+ }
+
+ // Virtual cases.
+
+ case kIntrinsicReferenceGetReferent:
+ return Intrinsics::kReferenceGetReferent;
+
+ // Quick inliner cases. Remove after refactoring. They are here so that we can use the
+ // compiler to warn on missing cases.
+
+ case kInlineOpNop:
+ case kInlineOpReturnArg:
+ case kInlineOpNonWideConst:
+ case kInlineOpIGet:
+ case kInlineOpIPut:
+ return Intrinsics::kNone;
+
+ // No default case to make the compiler warn on missing cases.
+ }
+ return Intrinsics::kNone;
+}
+
+static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) {
+ // The DexFileMethodInliner should have checked whether the methods are agreeing with
+ // what we expect, i.e., static methods are called as such. Add another check here for
+ // our expectations:
+ // Whenever the intrinsic is marked as static-or-direct, report an error if we find an
+ // InvokeVirtual. The other direction is not possible: we have intrinsics for virtual
+ // functions that will perform a check inline. If the precise type is known, however,
+ // the instruction will be sharpened to an InvokeStaticOrDirect.
+ InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
+ InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ?
+ invoke->AsInvokeStaticOrDirect()->GetInvokeType() :
+ invoke->IsInvokeVirtual() ? kVirtual : kSuper;
+ switch (intrinsic_type) {
+ case kStatic:
+ return (invoke_type == kStatic);
+ case kDirect:
+ return (invoke_type == kDirect);
+ case kVirtual:
+ // Call might be devirtualized.
+ return (invoke_type == kVirtual || invoke_type == kDirect);
+
+ default:
+ return false;
+ }
+}
+
+// TODO: Refactor DexFileMethodInliner and have something nicer than InlineMethod.
+void IntrinsicsRecognizer::Run() {
+ DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(dex_file_);
+ DCHECK(inliner != nullptr);
+
+ for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
+ inst_it.Advance()) {
+ HInstruction* inst = inst_it.Current();
+ if (inst->IsInvoke()) {
+ HInvoke* invoke = inst->AsInvoke();
+ InlineMethod method;
+ if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) {
+ Intrinsics intrinsic = GetIntrinsic(method);
+
+ if (intrinsic != Intrinsics::kNone) {
+ if (!CheckInvokeType(intrinsic, invoke)) {
+ LOG(WARNING) << "Found an intrinsic with unexpected invoke type: "
+ << intrinsic << " for "
+ << PrettyMethod(invoke->GetDexMethodIndex(), *dex_file_);
+ } else {
+ invoke->SetIntrinsic(intrinsic);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) {
+ switch (intrinsic) {
+ case Intrinsics::kNone:
+ os << "No intrinsic.";
+ break;
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ case Intrinsics::k ## Name: \
+ os << # Name; \
+ break;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef STATIC_INTRINSICS_LIST
+#undef VIRTUAL_INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+ }
+ return os;
+}
+
+} // namespace art
+
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
new file mode 100644
index 0000000000..29cc8efcc3
--- /dev/null
+++ b/compiler/optimizing/intrinsics.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CompilerDriver;
+class DexFile;
+
+// Recognize intrinsics from HInvoke nodes.
+class IntrinsicsRecognizer : public HOptimization {
+ public:
+ IntrinsicsRecognizer(HGraph* graph, const DexFile* dex_file, CompilerDriver* driver)
+ : HOptimization(graph, true, "intrinsics_recognition"),
+ dex_file_(dex_file), driver_(driver) {}
+
+ void Run() OVERRIDE;
+
+ private:
+ const DexFile* dex_file_;
+ CompilerDriver* driver_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer);
+};
+
+class IntrinsicVisitor : public ValueObject {
+ public:
+ virtual ~IntrinsicVisitor() {}
+
+ // Dispatch logic.
+
+ void Dispatch(HInvoke* invoke) {
+ switch (invoke->GetIntrinsic()) {
+ case Intrinsics::kNone:
+ return;
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ case Intrinsics::k ## Name: \
+ Visit ## Name(invoke); \
+ return;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ // Do not put a default case. That way the compiler will complain if we missed a case.
+ }
+ }
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+ }
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ protected:
+ IntrinsicVisitor() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicVisitor);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_H_
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
new file mode 100644
index 0000000000..29ca20cca0
--- /dev/null
+++ b/compiler/optimizing/intrinsics_list.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
+
+// All intrinsics supported by the optimizing compiler. Format is name, then whether it is expected
+// to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual).
+
+#define INTRINSICS_LIST(V) \
+ V(DoubleDoubleToRawLongBits, kStatic) \
+ V(DoubleLongBitsToDouble, kStatic) \
+ V(FloatFloatToRawIntBits, kStatic) \
+ V(FloatIntBitsToFloat, kStatic) \
+ V(IntegerReverse, kStatic) \
+ V(IntegerReverseBytes, kStatic) \
+ V(LongReverse, kStatic) \
+ V(LongReverseBytes, kStatic) \
+ V(ShortReverseBytes, kStatic) \
+ V(MathAbsDouble, kStatic) \
+ V(MathAbsFloat, kStatic) \
+ V(MathAbsLong, kStatic) \
+ V(MathAbsInt, kStatic) \
+ V(MathMinDoubleDouble, kStatic) \
+ V(MathMinFloatFloat, kStatic) \
+ V(MathMinLongLong, kStatic) \
+ V(MathMinIntInt, kStatic) \
+ V(MathMaxDoubleDouble, kStatic) \
+ V(MathMaxFloatFloat, kStatic) \
+ V(MathMaxLongLong, kStatic) \
+ V(MathMaxIntInt, kStatic) \
+ V(MathSqrt, kStatic) \
+ V(MathCeil, kStatic) \
+ V(MathFloor, kStatic) \
+ V(MathRint, kStatic) \
+ V(MathRoundDouble, kStatic) \
+ V(MathRoundFloat, kStatic) \
+ V(SystemArrayCopyChar, kStatic) \
+ V(ThreadCurrentThread, kStatic) \
+ V(MemoryPeekByte, kStatic) \
+ V(MemoryPeekIntNative, kStatic) \
+ V(MemoryPeekLongNative, kStatic) \
+ V(MemoryPeekShortNative, kStatic) \
+ V(MemoryPokeByte, kStatic) \
+ V(MemoryPokeIntNative, kStatic) \
+ V(MemoryPokeLongNative, kStatic) \
+ V(MemoryPokeShortNative, kStatic) \
+ V(StringCharAt, kDirect) \
+ V(StringCompareTo, kDirect) \
+ V(StringIsEmpty, kDirect) \
+ V(StringIndexOf, kDirect) \
+ V(StringIndexOfAfter, kDirect) \
+ V(StringLength, kDirect) \
+ V(UnsafeCASInt, kDirect) \
+ V(UnsafeCASLong, kDirect) \
+ V(UnsafeCASObject, kDirect) \
+ V(UnsafeGet, kDirect) \
+ V(UnsafeGetVolatile, kDirect) \
+ V(UnsafeGetLong, kDirect) \
+ V(UnsafeGetLongVolatile, kDirect) \
+ V(UnsafePut, kDirect) \
+ V(UnsafePutOrdered, kDirect) \
+ V(UnsafePutVolatile, kDirect) \
+ V(UnsafePutObject, kDirect) \
+ V(UnsafePutObjectOrdered, kDirect) \
+ V(UnsafePutObjectVolatile, kDirect) \
+ V(UnsafePutLong, kDirect) \
+ V(UnsafePutLongOrdered, kDirect) \
+ V(UnsafePutLongVolatile, kDirect) \
+ \
+ V(ReferenceGetReferent, kVirtual)
+
+#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
+#undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ // #define is only for lint.
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
new file mode 100644
index 0000000000..c1f4c94b7f
--- /dev/null
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -0,0 +1,984 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_x86_64.h"
+
+#include "code_generator_x86_64.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "intrinsics.h"
+#include "mirror/array-inl.h"
+#include "mirror/art_method.h"
+#include "mirror/string.h"
+#include "thread.h"
+#include "utils/x86_64/assembler_x86_64.h"
+#include "utils/x86_64/constants_x86_64.h"
+
+namespace art {
+
+namespace x86_64 {
+
+static constexpr bool kIntrinsified = true;
+
+X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
+ return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetArena() {
+ return codegen_->GetGraph()->GetArena();
+}
+
+bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
+ Dispatch(invoke);
+ const LocationSummary* res = invoke->GetLocations();
+ return res != nullptr && res->Intrinsified();
+}
+
+#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
+
+// TODO: trg as memory.
+static void MoveFromReturnRegister(Location trg,
+ Primitive::Type type,
+ CodeGeneratorX86_64* codegen) {
+ if (!trg.IsValid()) {
+ DCHECK(type == Primitive::kPrimVoid);
+ return;
+ }
+
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot: {
+ CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
+ if (trg_reg.AsRegister() != RAX) {
+ __ movl(trg_reg, CpuRegister(RAX));
+ }
+ break;
+ }
+ case Primitive::kPrimLong: {
+ CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
+ if (trg_reg.AsRegister() != RAX) {
+ __ movq(trg_reg, CpuRegister(RAX));
+ }
+ break;
+ }
+
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected void type for valid location " << trg;
+ UNREACHABLE();
+
+ case Primitive::kPrimDouble: {
+ XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
+ if (trg_reg.AsFloatRegister() != XMM0) {
+ __ movsd(trg_reg, XmmRegister(XMM0));
+ }
+ break;
+ }
+ case Primitive::kPrimFloat: {
+ XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
+ if (trg_reg.AsFloatRegister() != XMM0) {
+ __ movss(trg_reg, XmmRegister(XMM0));
+ }
+ break;
+ }
+ }
+}
+
+static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
+ if (invoke->InputCount() == 0) {
+ return;
+ }
+
+ LocationSummary* locations = invoke->GetLocations();
+ InvokeDexCallingConventionVisitor calling_convention_visitor;
+
+ // We're moving potentially two or more locations to locations that could overlap, so we need
+ // a parallel move resolver.
+ HParallelMove parallel_move(arena);
+
+ for (size_t i = 0; i < invoke->InputCount(); i++) {
+ HInstruction* input = invoke->InputAt(i);
+ Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
+ Location actual_loc = locations->InAt(i);
+
+ parallel_move.AddMove(new (arena) MoveOperands(actual_loc, cc_loc, nullptr));
+ }
+
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+}
+
+// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
+// call. This will copy the arguments into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations given by the invoke's location
+// summary. If an intrinsic modifies those locations before a slowpath call, they must be
+// restored!
+class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
+ public:
+ explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
+
+ void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
+ __ Bind(GetEntryLabel());
+
+ codegen->SaveLiveRegisters(invoke_->GetLocations());
+
+ MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+
+ if (invoke_->IsInvokeStaticOrDirect()) {
+ codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
+ } else {
+ UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
+ UNREACHABLE();
+ }
+
+ // Copy the result back to the expected output.
+ Location out = invoke_->GetLocations()->Out();
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
+ DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+ MoveFromReturnRegister(out, invoke_->GetType(), codegen);
+ }
+
+ codegen->RestoreLiveRegisters(invoke_->GetLocations());
+ __ jmp(GetExitLabel());
+ }
+
+ private:
+ // The instruction where this slow path is happening.
+ HInvoke* const invoke_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
+};
+
+#undef __
+#define __ assembler->
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+ Location input = locations->InAt(0);
+ Location output = locations->Out();
+ __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+ Location input = locations->InAt(0);
+ Location output = locations->Out();
+ __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+static void GenReverseBytes(LocationSummary* locations,
+ Primitive::Type size,
+ X86_64Assembler* assembler) {
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ switch (size) {
+ case Primitive::kPrimShort:
+ // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
+ __ bswapl(out);
+ __ sarl(out, Immediate(16));
+ break;
+ case Primitive::kPrimInt:
+ __ bswapl(out);
+ break;
+ case Primitive::kPrimLong:
+ __ bswapq(out);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
+ UNREACHABLE();
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
+ GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
+ GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
+ GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+
+// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
+// need is 64b.
+
+static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
+ // TODO: Enable memory operations when the assembler supports them.
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ // TODO: Allow x86 to work with memory. This requires assembler support, see below.
+ // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
+ locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above.
+}
+
+static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+ Location output = locations->Out();
+ CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+ if (output.IsFpuRegister()) {
+ // In-register
+ XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+
+ if (is64bit) {
+ __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
+ __ movd(xmm_temp, cpu_temp);
+ __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
+ } else {
+ __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
+ __ movd(xmm_temp, cpu_temp);
+ __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
+ }
+ } else {
+ // TODO: update when assember support is available.
+ UNIMPLEMENTED(FATAL) << "Needs assembler support.";
+// Once assembler support is available, in-memory operations look like this:
+// if (is64bit) {
+// DCHECK(output.IsDoubleStackSlot());
+// // No 64b and with literal.
+// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
+// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
+// } else {
+// DCHECK(output.IsStackSlot());
+// // Can use and with a literal directly.
+// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
+// }
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
+ CreateFloatToFloatPlusTemps(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
+ MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
+ CreateFloatToFloatPlusTemps(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
+ MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+ Location output = locations->Out();
+ CpuRegister out = output.AsRegister<CpuRegister>();
+ CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+ if (is64bit) {
+ // Create mask.
+ __ movq(mask, out);
+ __ sarq(mask, Immediate(63));
+ // Add mask.
+ __ addq(out, mask);
+ __ xorq(out, mask);
+ } else {
+ // Create mask.
+ __ movl(mask, out);
+ __ sarl(mask, Immediate(31));
+ // Add mask.
+ __ addl(out, mask);
+ __ xorl(out, mask);
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
+ CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
+ GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
+ CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
+ GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
+ X86_64Assembler* assembler) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc));
+ return;
+ }
+
+ // (out := op1)
+ // out <=? op2
+ // if Nan jmp Nan_label
+ // if out is min jmp done
+ // if op2 is min jmp op2_label
+ // handle -0/+0
+ // jmp done
+ // Nan_label:
+ // out := NaN
+ // op2_label:
+ // out := op2
+ // done:
+ //
+ // This removes one jmp, but needs to copy one input (op1) to out.
+ //
+ // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+
+ XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+ Label nan, done, op2_label;
+ if (is_double) {
+ __ ucomisd(out, op2);
+ } else {
+ __ ucomiss(out, op2);
+ }
+
+ __ j(Condition::kParityEven, &nan);
+
+ __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+ __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+ // Handle 0.0/-0.0.
+ if (is_min) {
+ if (is_double) {
+ __ orpd(out, op2);
+ } else {
+ __ orps(out, op2);
+ }
+ } else {
+ if (is_double) {
+ __ andpd(out, op2);
+ } else {
+ __ andps(out, op2);
+ }
+ }
+ __ jmp(&done);
+
+ // NaN handling.
+ __ Bind(&nan);
+ CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
+ if (is_double) {
+ __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
+ } else {
+ __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
+ }
+ __ movd(out, cpu_temp, is_double);
+ __ jmp(&done);
+
+ // out := op2;
+ __ Bind(&op2_label);
+ if (is_double) {
+ __ movsd(out, op2);
+ } else {
+ __ movss(out, op2);
+ }
+
+ // Done.
+ __ Bind(&done);
+}
+
+static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ // The following is sub-optimal, but all we can do for now. It would be fine to also accept
+ // the second input to be the output (we can simply swap inputs).
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+ CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+ GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
+ CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
+ GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+ CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+ GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+ CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+ GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+}
+
+static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
+ X86_64Assembler* assembler) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ // Can return immediately, as op1_loc == out_loc.
+ // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+ // a copy here.
+ DCHECK(locations->Out().Equals(op1_loc));
+ return;
+ }
+
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
+
+ // (out := op1)
+ // out <=? op2
+ // if out is min jmp done
+ // out := op2
+ // done:
+
+ if (is_long) {
+ __ cmpq(out, op2);
+ } else {
+ __ cmpl(out, op2);
+ }
+
+ __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
+ CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+
+ GetAssembler()->sqrtsd(out, in);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
+ // The inputs plus one temp.
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCallOnSlowPath,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+
+ // Location of reference to data array
+ const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+ // Location of count
+ const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+ // Starting offset within data array
+ const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
+ // Start of char data with array_
+ const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
+
+ CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ Location temp_loc = locations->GetTemp(0);
+ CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+
+ // Note: Nullcheck has been done before in a HNullCheck before the HInvokeVirtual. If/when we
+ // move to (coalesced) implicit checks, we have to do a null check below.
+ DCHECK(!kCoalescedImplicitNullCheck);
+
+ // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
+ // the cost.
+ // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
+ // we will not optimize the code for constants (which would save a register).
+
+ SlowPathCodeX86_64* slow_path = new (GetArena()) IntrinsicSlowPathX86_64(invoke);
+ codegen_->AddSlowPath(slow_path);
+
+ X86_64Assembler* assembler = GetAssembler();
+
+ __ cmpl(idx, Address(obj, count_offset));
+ __ j(kAboveEqual, slow_path->GetEntryLabel());
+
+ // Get the actual element.
+ __ movl(temp, idx); // temp := idx.
+ __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
+ __ movl(out, Address(obj, value_offset)); // obj := obj.array.
+ // out = out[2*temp].
+ __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
+static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
+ CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
+ // x86 allows unaligned access. We do not have to check the input or use specific instructions
+ // to avoid a SIGBUS.
+ switch (size) {
+ case Primitive::kPrimByte:
+ __ movsxb(out, Address(address, 0));
+ break;
+ case Primitive::kPrimShort:
+ __ movsxw(out, Address(address, 0));
+ break;
+ case Primitive::kPrimInt:
+ __ movl(out, Address(address, 0));
+ break;
+ case Primitive::kPrimLong:
+ __ movq(out, Address(address, 0));
+ break;
+ default:
+ LOG(FATAL) << "Type not recognized for peek: " << size;
+ UNREACHABLE();
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
+ GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+}
+
+static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
+ CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
+ // x86 allows unaligned access. We do not have to check the input or use specific instructions
+ // to avoid a SIGBUS.
+ switch (size) {
+ case Primitive::kPrimByte:
+ __ movb(Address(address, 0), value);
+ break;
+ case Primitive::kPrimShort:
+ __ movw(Address(address, 0), value);
+ break;
+ case Primitive::kPrimInt:
+ __ movl(Address(address, 0), value);
+ break;
+ case Primitive::kPrimLong:
+ __ movq(Address(address, 0), value);
+ break;
+ default:
+ LOG(FATAL) << "Type not recognized for poke: " << size;
+ UNREACHABLE();
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
+ GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
+ CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
+ GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
+}
+
+static void GenUnsafeGet(LocationSummary* locations, bool is_long,
+ bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
+ CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+ CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
+
+ if (is_long) {
+ __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ } else {
+ // TODO: Distinguish object. In case we move to an actual compressed heap, retrieving an object
+ // pointer will entail an unpack operation.
+ __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ }
+}
+
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), false, false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), false, true, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), true, false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
+ Primitive::Type type,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RequiresRegister());
+ if (type == Primitive::kPrimNot) {
+ // Need temp registers for card-marking.
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
+}
+
+// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
+// memory model.
+static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
+ CodeGeneratorX86_64* codegen) {
+ X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
+ CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+ CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
+
+ if (type == Primitive::kPrimLong) {
+ __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
+ } else {
+ __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
+ }
+
+ if (is_volatile) {
+ __ mfence();
+ }
+
+ if (type == Primitive::kPrimNot) {
+ codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
+ locations->GetTemp(1).AsRegister<CpuRegister>(),
+ base,
+ value);
+ }
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
+}
+
+// Unimplemented intrinsics.
+
+#define UNIMPLEMENTED_INTRINSIC(Name) \
+void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+} \
+void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+}
+
+UNIMPLEMENTED_INTRINSIC(IntegerReverse)
+UNIMPLEMENTED_INTRINSIC(LongReverse)
+UNIMPLEMENTED_INTRINSIC(MathFloor)
+UNIMPLEMENTED_INTRINSIC(MathCeil)
+UNIMPLEMENTED_INTRINSIC(MathRint)
+UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
+UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should
+UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here.
+UNIMPLEMENTED_INTRINSIC(StringCompareTo)
+UNIMPLEMENTED_INTRINSIC(StringIndexOf)
+UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
+UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+
+} // namespace x86_64
+} // namespace art
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
new file mode 100644
index 0000000000..c1fa99c2dc
--- /dev/null
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
+
+#include "intrinsics.h"
+
+namespace art {
+
+class ArenaAllocator;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace x86_64 {
+
+class CodeGeneratorX86_64;
+class X86_64Assembler;
+
+class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+ // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+ // the invoke.
+ bool TryDispatch(HInvoke* invoke);
+
+ private:
+ ArenaAllocator* arena_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64);
+};
+
+class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicCodeGeneratorX86_64(CodeGeneratorX86_64* codegen) : codegen_(codegen) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+ X86_64Assembler* GetAssembler();
+
+ ArenaAllocator* GetArena();
+
+ CodeGeneratorX86_64* codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86_64);
+};
+
+} // namespace x86_64
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index ed5e260a5b..9f2f9ece85 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -20,7 +20,9 @@
namespace art {
-LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind)
+LocationSummary::LocationSummary(HInstruction* instruction,
+ CallKind call_kind,
+ bool intrinsified)
: inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0),
environment_(instruction->GetBlock()->GetGraph()->GetArena(),
@@ -29,7 +31,8 @@ LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind)
call_kind_(call_kind),
stack_mask_(nullptr),
register_mask_(0),
- live_registers_() {
+ live_registers_(),
+ intrinsified_(intrinsified) {
inputs_.SetSize(instruction->InputCount());
for (size_t i = 0; i < instruction->InputCount(); ++i) {
inputs_.Put(i, Location());
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 7df99d4b6f..d41b3aecfd 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -463,7 +463,9 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
kCall
};
- LocationSummary(HInstruction* instruction, CallKind call_kind = kNoCall);
+ LocationSummary(HInstruction* instruction,
+ CallKind call_kind = kNoCall,
+ bool intrinsified = false);
void SetInAt(uint32_t at, Location location) {
DCHECK(inputs_.Get(at).IsUnallocated() || inputs_.Get(at).IsInvalid());
@@ -574,6 +576,10 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
return output_overlaps_;
}
+ bool Intrinsified() const {
+ return intrinsified_;
+ }
+
private:
GrowableArray<Location> inputs_;
GrowableArray<Location> temps_;
@@ -593,6 +599,9 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
// Registers that are in use at this position.
RegisterSet live_registers_;
+ // Whether these are locations for an intrinsified call.
+ const bool intrinsified_;
+
ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint);
ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint);
DISALLOW_COPY_AND_ASSIGN(LocationSummary);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b98bc70a9f..f2aa043849 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1580,19 +1580,18 @@ class HLongConstant : public HConstant {
DISALLOW_COPY_AND_ASSIGN(HLongConstant);
};
+enum class Intrinsics {
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) k ## Name,
+#include "intrinsics_list.h"
+ kNone,
+ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+};
+std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic);
+
class HInvoke : public HInstruction {
public:
- HInvoke(ArenaAllocator* arena,
- uint32_t number_of_arguments,
- Primitive::Type return_type,
- uint32_t dex_pc)
- : HInstruction(SideEffects::All()),
- inputs_(arena, number_of_arguments),
- return_type_(return_type),
- dex_pc_(dex_pc) {
- inputs_.SetSize(number_of_arguments);
- }
-
virtual size_t InputCount() const { return inputs_.Size(); }
virtual HInstruction* InputAt(size_t i) const { return inputs_.Get(i); }
@@ -1612,12 +1611,38 @@ class HInvoke : public HInstruction {
uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexMethodIndex() const { return dex_method_index_; }
+
+ Intrinsics GetIntrinsic() {
+ return intrinsic_;
+ }
+
+ void SetIntrinsic(Intrinsics intrinsic) {
+ intrinsic_ = intrinsic;
+ }
+
DECLARE_INSTRUCTION(Invoke);
protected:
+ HInvoke(ArenaAllocator* arena,
+ uint32_t number_of_arguments,
+ Primitive::Type return_type,
+ uint32_t dex_pc,
+ uint32_t dex_method_index)
+ : HInstruction(SideEffects::All()),
+ inputs_(arena, number_of_arguments),
+ return_type_(return_type),
+ dex_pc_(dex_pc),
+ dex_method_index_(dex_method_index),
+ intrinsic_(Intrinsics::kNone) {
+ inputs_.SetSize(number_of_arguments);
+ }
+
GrowableArray<HInstruction*> inputs_;
const Primitive::Type return_type_;
const uint32_t dex_pc_;
+ const uint32_t dex_method_index_;
+ Intrinsics intrinsic_;
private:
DISALLOW_COPY_AND_ASSIGN(HInvoke);
@@ -1629,19 +1654,16 @@ class HInvokeStaticOrDirect : public HInvoke {
uint32_t number_of_arguments,
Primitive::Type return_type,
uint32_t dex_pc,
- uint32_t index_in_dex_cache,
+ uint32_t dex_method_index,
InvokeType invoke_type)
- : HInvoke(arena, number_of_arguments, return_type, dex_pc),
- index_in_dex_cache_(index_in_dex_cache),
+ : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
invoke_type_(invoke_type) {}
- uint32_t GetIndexInDexCache() const { return index_in_dex_cache_; }
InvokeType GetInvokeType() const { return invoke_type_; }
DECLARE_INSTRUCTION(InvokeStaticOrDirect);
private:
- const uint32_t index_in_dex_cache_;
const InvokeType invoke_type_;
DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
@@ -1653,8 +1675,9 @@ class HInvokeVirtual : public HInvoke {
uint32_t number_of_arguments,
Primitive::Type return_type,
uint32_t dex_pc,
+ uint32_t dex_method_index,
uint32_t vtable_index)
- : HInvoke(arena, number_of_arguments, return_type, dex_pc),
+ : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
vtable_index_(vtable_index) {}
uint32_t GetVTableIndex() const { return vtable_index_; }
@@ -1675,8 +1698,7 @@ class HInvokeInterface : public HInvoke {
uint32_t dex_pc,
uint32_t dex_method_index,
uint32_t imt_index)
- : HInvoke(arena, number_of_arguments, return_type, dex_pc),
- dex_method_index_(dex_method_index),
+ : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
imt_index_(imt_index) {}
uint32_t GetImtIndex() const { return imt_index_; }
@@ -1685,7 +1707,6 @@ class HInvokeInterface : public HInvoke {
DECLARE_INSTRUCTION(InvokeInterface);
private:
- const uint32_t dex_method_index_;
const uint32_t imt_index_;
DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
@@ -2775,10 +2796,16 @@ class HParallelMove : public HTemplateInstruction<0> {
: HTemplateInstruction(SideEffects::None()), moves_(arena, kDefaultNumberOfMoves) {}
void AddMove(MoveOperands* move) {
- if (kIsDebugBuild && move->GetInstruction() != nullptr) {
+ if (kIsDebugBuild) {
+ if (move->GetInstruction() != nullptr) {
+ for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
+ DCHECK_NE(moves_.Get(i)->GetInstruction(), move->GetInstruction())
+ << "Doing parallel moves for the same instruction.";
+ }
+ }
for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
- DCHECK_NE(moves_.Get(i)->GetInstruction(), move->GetInstruction())
- << "Doing parallel moves for the same instruction.";
+ DCHECK(!move->GetDestination().Contains(moves_.Get(i)->GetDestination()))
+ << "Same destination for two moves in a parallel move.";
}
}
moves_.Add(move);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 692d452f54..605637300f 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -25,6 +25,7 @@
#include "compiler.h"
#include "constant_folding.h"
#include "dead_code_elimination.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "driver/compiler_driver.h"
#include "driver/dex_compilation_unit.h"
#include "elf_writer_quick.h"
@@ -32,6 +33,7 @@
#include "gvn.h"
#include "inliner.h"
#include "instruction_simplifier.h"
+#include "intrinsics.h"
#include "jni/quick/jni_compiler.h"
#include "mirror/art_method-inl.h"
#include "nodes.h"
@@ -149,11 +151,12 @@ void OptimizingCompiler::Init() {
// Enable C1visualizer output. Must be done in Init() because the compiler
// driver is not fully initialized when passed to the compiler's constructor.
CompilerDriver* driver = GetCompilerDriver();
- if (driver->GetDumpPasses()) {
+ const std::string cfg_file_name = driver->GetDumpCfgFileName();
+ if (!cfg_file_name.empty()) {
CHECK_EQ(driver->GetThreadCount(), 1U)
<< "Graph visualizer requires the compiler to run single-threaded. "
<< "Invoke the compiler with '-j1'.";
- visualizer_output_.reset(new std::ofstream("art.cfg"));
+ visualizer_output_.reset(new std::ofstream(cfg_file_name));
}
}
@@ -214,9 +217,12 @@ static void RunOptimizations(HGraph* graph,
BoundsCheckElimination bce(graph);
InstructionSimplifier simplify2(graph);
+ IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver);
+
HOptimization* optimizations[] = {
&redundant_phi,
&dead_phi,
+ &intrinsics,
&dce,
&fold,
&simplify1,
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 04b56345c4..b3eb1e2d51 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -45,8 +45,12 @@ namespace art {
LiveInterval* BuildInterval(const size_t ranges[][2],
size_t number_of_ranges,
ArenaAllocator* allocator,
- int reg = -1) {
- LiveInterval* interval = LiveInterval::MakeInterval(allocator, Primitive::kPrimInt);
+ int reg = -1,
+ HInstruction* defined_by = nullptr) {
+ LiveInterval* interval = LiveInterval::MakeInterval(allocator, Primitive::kPrimInt, defined_by);
+ if (defined_by != nullptr) {
+ defined_by->SetLiveInterval(interval);
+ }
for (size_t i = number_of_ranges; i > 0; --i) {
interval->AddRange(ranges[i - 1][0], ranges[i - 1][1]);
}
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 210f7d7f09..7ab41b6ddc 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -120,16 +120,9 @@ TEST(ParallelMoveTest, Swap) {
{
TestParallelMoveResolver resolver(&allocator);
- static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}};
+ static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 0}};
resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1)", resolver.GetMessage().c_str());
- }
-
- {
- TestParallelMoveResolver resolver(&allocator);
- static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}, {5, 4}};
- resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1) (5 -> 4)", resolver.GetMessage().c_str());
+ ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str());
}
}
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 1efc52b9ec..93ed44ee44 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -56,7 +56,8 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
reserved_out_slots_(0),
- maximum_number_of_live_registers_(0) {
+ maximum_number_of_live_core_registers_(0),
+ maximum_number_of_live_fp_registers_(0) {
codegen->SetupBlockedRegisters();
physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters());
physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters());
@@ -185,9 +186,6 @@ void RegisterAllocator::AllocateRegistersInternal() {
}
LinearScan();
- size_t saved_maximum_number_of_live_registers = maximum_number_of_live_registers_;
- maximum_number_of_live_registers_ = 0;
-
inactive_.Reset();
active_.Reset();
handled_.Reset();
@@ -207,7 +205,6 @@ void RegisterAllocator::AllocateRegistersInternal() {
}
}
LinearScan();
- maximum_number_of_live_registers_ += saved_maximum_number_of_live_registers;
}
void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
@@ -602,8 +599,13 @@ void RegisterAllocator::LinearScan() {
if (current->IsSlowPathSafepoint()) {
// Synthesized interval to record the maximum number of live registers
// at safepoints. No need to allocate a register for it.
- maximum_number_of_live_registers_ =
- std::max(maximum_number_of_live_registers_, active_.Size());
+ if (processing_core_registers_) {
+ maximum_number_of_live_core_registers_ =
+ std::max(maximum_number_of_live_core_registers_, active_.Size());
+ } else {
+ maximum_number_of_live_fp_registers_ =
+ std::max(maximum_number_of_live_fp_registers_, active_.Size());
+ }
DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart());
continue;
}
@@ -855,7 +857,9 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
DCHECK(!active->IsFixed());
LiveInterval* split = Split(active, current->GetStart());
active_.DeleteAt(i);
- handled_.Add(active);
+ if (split != active) {
+ handled_.Add(active);
+ }
AddSorted(unhandled_, split);
break;
}
@@ -876,9 +880,14 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
if (next_intersection != kNoLifetime) {
if (inactive->IsFixed()) {
LiveInterval* split = Split(current, next_intersection);
+ DCHECK_NE(split, current);
AddSorted(unhandled_, split);
} else {
- LiveInterval* split = Split(inactive, next_intersection);
+ // Split at the start of `current`, which will lead to splitting
+ // at the end of the lifetime hole of `inactive`.
+ LiveInterval* split = Split(inactive, current->GetStart());
+ // If it's inactive, it must start before the current interval.
+ DCHECK_NE(split, inactive);
inactive_.DeleteAt(i);
--i;
--e;
@@ -1215,10 +1224,17 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
locations->SetEnvironmentAt(use->GetInputIndex(), source);
} else {
Location expected_location = locations->InAt(use->GetInputIndex());
- if (expected_location.IsUnallocated()) {
- locations->SetInAt(use->GetInputIndex(), source);
- } else if (!expected_location.IsConstant()) {
- AddInputMoveFor(use->GetUser(), source, expected_location);
+ // The expected (actual) location may be invalid in case the input is unused. Currently
+ // this only happens for intrinsics.
+ if (expected_location.IsValid()) {
+ if (expected_location.IsUnallocated()) {
+ locations->SetInAt(use->GetInputIndex(), source);
+ } else if (!expected_location.IsConstant()) {
+ AddInputMoveFor(use->GetUser(), source, expected_location);
+ }
+ } else {
+ DCHECK(use->GetUser()->IsInvoke());
+ DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
}
}
use = use->GetNext();
@@ -1255,8 +1271,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
switch (source.GetKind()) {
case Location::kRegister: {
locations->AddLiveRegister(source);
- DCHECK_LE(locations->GetNumberOfLiveRegisters(), maximum_number_of_live_registers_);
-
+ DCHECK_LE(locations->GetNumberOfLiveRegisters(),
+ maximum_number_of_live_core_registers_ +
+ maximum_number_of_live_fp_registers_);
if (current->GetType() == Primitive::kPrimNot) {
locations->SetRegisterBit(source.reg());
}
@@ -1349,7 +1366,8 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
void RegisterAllocator::Resolve() {
codegen_->ComputeFrameSize(
- spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_);
+ spill_slots_.Size(), maximum_number_of_live_core_registers_,
+ maximum_number_of_live_fp_registers_, reserved_out_slots_);
// Adjust the Out Location of instructions.
// TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index c152a8bf67..ec46a776b5 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -190,10 +190,14 @@ class RegisterAllocator {
// Slots reserved for out arguments.
size_t reserved_out_slots_;
- // The maximum live registers at safepoints.
- size_t maximum_number_of_live_registers_;
+ // The maximum live core registers at safepoints.
+ size_t maximum_number_of_live_core_registers_;
+
+ // The maximum live FP registers at safepoints.
+ size_t maximum_number_of_live_fp_registers_;
ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
+ ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
DISALLOW_COPY_AND_ASSIGN(RegisterAllocator);
};
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index c2ea80ec33..0948643355 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -738,4 +738,106 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
}
}
+// Test a bug in the register allocator, where allocating a blocked
+// register would lead to spilling an inactive interval at the wrong
+// position.
+TEST(RegisterAllocatorTest, SpillInactive) {
+ ArenaPool pool;
+
+ // Create a synthesized graph to please the register_allocator and
+ // ssa_liveness_analysis code.
+ ArenaAllocator allocator(&pool);
+ HGraph* graph = new (&allocator) HGraph(&allocator);
+ HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ HInstruction* one = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+ HInstruction* two = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+ HInstruction* three = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+ HInstruction* four = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+ entry->AddInstruction(one);
+ entry->AddInstruction(two);
+ entry->AddInstruction(three);
+ entry->AddInstruction(four);
+
+ HBasicBlock* block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block);
+ entry->AddSuccessor(block);
+ block->AddInstruction(new (&allocator) HExit());
+
+ // We create a synthesized user requesting a register, to avoid just spilling the
+ // intervals.
+ HPhi* user = new (&allocator) HPhi(&allocator, 0, 1, Primitive::kPrimInt);
+ user->AddInput(one);
+ user->SetBlock(block);
+ LocationSummary* locations = new (&allocator) LocationSummary(user, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ static constexpr size_t phi_ranges[][2] = {{20, 30}};
+ BuildInterval(phi_ranges, arraysize(phi_ranges), &allocator, -1, user);
+
+ // Create an interval with lifetime holes.
+ static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}};
+ LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), &allocator, -1, one);
+ first->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, first->first_use_);
+ first->first_use_ = new(&allocator) UsePosition(user, 0, false, 7, first->first_use_);
+ first->first_use_ = new(&allocator) UsePosition(user, 0, false, 6, first->first_use_);
+
+ locations = new (&allocator) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+ first = first->SplitAt(1);
+
+ // Create an interval that conflicts with the next interval, to force the next
+ // interval to call `AllocateBlockedReg`.
+ static constexpr size_t ranges2[][2] = {{2, 4}};
+ LiveInterval* second = BuildInterval(ranges2, arraysize(ranges2), &allocator, -1, two);
+ locations = new (&allocator) LocationSummary(second->GetDefinedBy(), LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+
+ // Create an interval that will lead to splitting the first interval. The bug occured
+ // by splitting at a wrong position, in this case at the next intersection between
+ // this interval and the first interval. We would have then put the interval with ranges
+ // "[0, 2(, [4, 6(" in the list of handled intervals, even though we haven't processed intervals
+ // before lifetime position 6 yet.
+ static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}};
+ LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), &allocator, -1, three);
+ third->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, third->first_use_);
+ third->first_use_ = new(&allocator) UsePosition(user, 0, false, 4, third->first_use_);
+ third->first_use_ = new(&allocator) UsePosition(user, 0, false, 3, third->first_use_);
+ locations = new (&allocator) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+ third = third->SplitAt(3);
+
+ // Because the first part of the split interval was considered handled, this interval
+ // was free to allocate the same register, even though it conflicts with it.
+ static constexpr size_t ranges4[][2] = {{4, 6}};
+ LiveInterval* fourth = BuildInterval(ranges4, arraysize(ranges4), &allocator, -1, four);
+ locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+
+ x86::CodeGeneratorX86 codegen(graph);
+ SsaLivenessAnalysis liveness(*graph, &codegen);
+
+ RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+ register_allocator.unhandled_core_intervals_.Add(fourth);
+ register_allocator.unhandled_core_intervals_.Add(third);
+ register_allocator.unhandled_core_intervals_.Add(second);
+ register_allocator.unhandled_core_intervals_.Add(first);
+
+ // Set just one register available to make all intervals compete for the same.
+ register_allocator.number_of_registers_ = 1;
+ register_allocator.registers_array_ = allocator.AllocArray<size_t>(1);
+ register_allocator.processing_core_registers_ = true;
+ register_allocator.unhandled_ = &register_allocator.unhandled_core_intervals_;
+ register_allocator.LinearScan();
+
+ // Test that there is no conflicts between intervals.
+ GrowableArray<LiveInterval*> intervals(&allocator, 0);
+ intervals.Add(first);
+ intervals.Add(second);
+ intervals.Add(third);
+ intervals.Add(fourth);
+ ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+ intervals, 0, 0, codegen, &allocator, true, false));
+}
+
} // namespace art
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 74611e1cbb..b632c4d05a 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -429,7 +429,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
LiveRange* current = first_range_;
LiveRange* previous = nullptr;
// Iterate over the ranges, and either find a range that covers this position, or
- // a two ranges in between this position (that is, the position is in a lifetime hole).
+ // two ranges in between this position (that is, the position is in a lifetime hole).
do {
if (position >= current->GetEnd()) {
// Move to next range.
@@ -653,6 +653,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
static constexpr int kNoRegister = -1;
static constexpr int kNoSpillSlot = -1;
+ ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
+
DISALLOW_COPY_AND_ASSIGN(LiveInterval);
};